diff options
Diffstat (limited to 'vendor/github.com/cloudwego/iasm')
21 files changed, 115594 insertions, 0 deletions
diff --git a/vendor/github.com/cloudwego/iasm/LICENSE-APACHE b/vendor/github.com/cloudwego/iasm/LICENSE-APACHE new file mode 100644 index 000000000..66a27ec5f --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/LICENSE-APACHE @@ -0,0 +1,177 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + diff --git a/vendor/github.com/cloudwego/iasm/expr/ast.go b/vendor/github.com/cloudwego/iasm/expr/ast.go new file mode 100644 index 000000000..a91bb2e25 --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/expr/ast.go @@ -0,0 +1,261 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package expr + +import ( + `fmt` +) + +// Type is tyep expression type. +type Type int + +const ( + // CONST indicates that the expression is a constant. + CONST Type = iota + + // TERM indicates that the expression is a Term reference. + TERM + + // EXPR indicates that the expression is a unary or binary expression. + EXPR +) + +var typeNames = map[Type]string { + EXPR : "Expr", + TERM : "Term", + CONST : "Const", +} + +// String returns the string representation of a Type. +func (self Type) String() string { + if v, ok := typeNames[self]; ok { + return v + } else { + return fmt.Sprintf("expr.Type(%d)", self) + } +} + +// Operator represents an operation to perform when Type is EXPR. +type Operator uint8 + +const ( + // ADD performs "Add Expr.Left and Expr.Right". + ADD Operator = iota + + // SUB performs "Subtract Expr.Left by Expr.Right". + SUB + + // MUL performs "Multiply Expr.Left by Expr.Right". + MUL + + // DIV performs "Divide Expr.Left by Expr.Right". + DIV + + // MOD performs "Modulo Expr.Left by Expr.Right". + MOD + + // AND performs "Bitwise AND Expr.Left and Expr.Right". + AND + + // OR performs "Bitwise OR Expr.Left and Expr.Right". + OR + + // XOR performs "Bitwise XOR Expr.Left and Expr.Right". + XOR + + // SHL performs "Bitwise Shift Expr.Left to the Left by Expr.Right Bits". + SHL + + // SHR performs "Bitwise Shift Expr.Left to the Right by Expr.Right Bits". + SHR + + // POW performs "Raise Expr.Left to the power of Expr.Right" + POW + + // NOT performs "Bitwise Invert Expr.Left". + NOT + + // NEG performs "Negate Expr.Left". + NEG +) + +var operatorNames = map[Operator]string { + ADD : "Add", + SUB : "Subtract", + MUL : "Multiply", + DIV : "Divide", + MOD : "Modulo", + AND : "And", + OR : "Or", + XOR : "ExclusiveOr", + SHL : "ShiftLeft", + SHR : "ShiftRight", + POW : "Power", + NOT : "Invert", + NEG : "Negate", +} + +// String returns the string representation of a Type. +func (self Operator) String() string { + if v, ok := operatorNames[self]; ok { + return v + } else { + return fmt.Sprintf("expr.Operator(%d)", self) + } +} + +// Expr represents an expression node. +type Expr struct { + Type Type + Term Term + Op Operator + Left *Expr + Right *Expr + Const int64 +} + +// Ref creates an expression from a Term. +func Ref(t Term) (p *Expr) { + p = newExpression() + p.Term = t + p.Type = TERM + return +} + +// Int creates an expression from an integer. +func Int(v int64) (p *Expr) { + p = newExpression() + p.Type = CONST + p.Const = v + return +} + +func (self *Expr) clear() { + if self.Term != nil { self.Term.Free() } + if self.Left != nil { self.Left.Free() } + if self.Right != nil { self.Right.Free() } +} + +// Free returns the Expr into pool. +// Any operation performed after Free is undefined behavior. +func (self *Expr) Free() { + self.clear() + freeExpression(self) +} + +// Evaluate evaluates the expression into an integer. +// It also implements the Term interface. +func (self *Expr) Evaluate() (int64, error) { + switch self.Type { + case EXPR : return self.eval() + case TERM : return self.Term.Evaluate() + case CONST : return self.Const, nil + default : panic("invalid expression type: " + self.Type.String()) + } +} + +/** Expression Combinator **/ + +func combine(a *Expr, op Operator, b *Expr) (r *Expr) { + r = newExpression() + r.Op = op + r.Type = EXPR + r.Left = a + r.Right = b + return +} + +func (self *Expr) Add(v *Expr) *Expr { return combine(self, ADD, v) } +func (self *Expr) Sub(v *Expr) *Expr { return combine(self, SUB, v) } +func (self *Expr) Mul(v *Expr) *Expr { return combine(self, MUL, v) } +func (self *Expr) Div(v *Expr) *Expr { return combine(self, DIV, v) } +func (self *Expr) Mod(v *Expr) *Expr { return combine(self, MOD, v) } +func (self *Expr) And(v *Expr) *Expr { return combine(self, AND, v) } +func (self *Expr) Or (v *Expr) *Expr { return combine(self, OR , v) } +func (self *Expr) Xor(v *Expr) *Expr { return combine(self, XOR, v) } +func (self *Expr) Shl(v *Expr) *Expr { return combine(self, SHL, v) } +func (self *Expr) Shr(v *Expr) *Expr { return combine(self, SHR, v) } +func (self *Expr) Pow(v *Expr) *Expr { return combine(self, POW, v) } +func (self *Expr) Not() *Expr { return combine(self, NOT, nil) } +func (self *Expr) Neg() *Expr { return combine(self, NEG, nil) } + +/** Expression Evaluator **/ + +var binaryEvaluators = [256]func(int64, int64) (int64, error) { + ADD: func(a, b int64) (int64, error) { return a + b, nil }, + SUB: func(a, b int64) (int64, error) { return a - b, nil }, + MUL: func(a, b int64) (int64, error) { return a * b, nil }, + DIV: idiv, + MOD: imod, + AND: func(a, b int64) (int64, error) { return a & b, nil }, + OR: func(a, b int64) (int64, error) { return a | b, nil }, + XOR: func(a, b int64) (int64, error) { return a ^ b, nil }, + SHL: func(a, b int64) (int64, error) { return a << b, nil }, + SHR: func(a, b int64) (int64, error) { return a >> b, nil }, + POW: ipow, +} + +func (self *Expr) eval() (int64, error) { + var lhs int64 + var rhs int64 + var err error + var vfn func(int64, int64) (int64, error) + + /* evaluate LHS */ + if lhs, err = self.Left.Evaluate(); err != nil { + return 0, err + } + + /* check for unary operators */ + switch self.Op { + case NOT: return self.unaryNot(lhs) + case NEG: return self.unaryNeg(lhs) + } + + /* check for operators */ + if vfn = binaryEvaluators[self.Op]; vfn == nil { + panic("invalid operator: " + self.Op.String()) + } + + /* must be a binary expression */ + if self.Right == nil { + panic("operator " + self.Op.String() + " is a binary operator") + } + + /* evaluate RHS, and call the operator */ + if rhs, err = self.Right.Evaluate(); err != nil { + return 0, err + } else { + return vfn(lhs, rhs) + } +} + +func (self *Expr) unaryNot(v int64) (int64, error) { + if self.Right == nil { + return ^v, nil + } else { + panic("operator Invert is an unary operator") + } +} + +func (self *Expr) unaryNeg(v int64) (int64, error) { + if self.Right == nil { + return -v, nil + } else { + panic("operator Negate is an unary operator") + } +} diff --git a/vendor/github.com/cloudwego/iasm/expr/errors.go b/vendor/github.com/cloudwego/iasm/expr/errors.go new file mode 100644 index 000000000..ece4cb8dd --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/expr/errors.go @@ -0,0 +1,53 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package expr + +import ( + `fmt` +) + +// SyntaxError represents a syntax error in the expression. +type SyntaxError struct { + Pos int + Reason string +} + +func newSyntaxError(pos int, reason string) *SyntaxError { + return &SyntaxError { + Pos : pos, + Reason : reason, + } +} + +func (self *SyntaxError) Error() string { + return fmt.Sprintf("Syntax error at position %d: %s", self.Pos, self.Reason) +} + +// RuntimeError is an error which would occure at run time. +type RuntimeError struct { + Reason string +} + +func newRuntimeError(reason string) *RuntimeError { + return &RuntimeError { + Reason: reason, + } +} + +func (self *RuntimeError) Error() string { + return "Runtime error: " + self.Reason +} diff --git a/vendor/github.com/cloudwego/iasm/expr/ops.go b/vendor/github.com/cloudwego/iasm/expr/ops.go new file mode 100644 index 000000000..7f168b902 --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/expr/ops.go @@ -0,0 +1,67 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package expr + +import ( + `fmt` +) + +func idiv(v int64, d int64) (int64, error) { + if d != 0 { + return v / d, nil + } else { + return 0, newRuntimeError("division by zero") + } +} + +func imod(v int64, d int64) (int64, error) { + if d != 0 { + return v % d, nil + } else { + return 0, newRuntimeError("division by zero") + } +} + +func ipow(v int64, e int64) (int64, error) { + mul := v + ret := int64(1) + + /* value must be 0 or positive */ + if v < 0 { + return 0, newRuntimeError(fmt.Sprintf("negative base value: %d", v)) + } + + /* exponent must be non-negative */ + if e < 0 { + return 0, newRuntimeError(fmt.Sprintf("negative exponent: %d", e)) + } + + /* fast power first round */ + if (e & 1) != 0 { + ret *= mul + } + + /* fast power remaining rounds */ + for e >>= 1; e != 0; e >>= 1 { + if mul *= mul; (e & 1) != 0 { + ret *= mul + } + } + + /* all done */ + return ret, nil +} diff --git a/vendor/github.com/cloudwego/iasm/expr/parser.go b/vendor/github.com/cloudwego/iasm/expr/parser.go new file mode 100644 index 000000000..1846a58a0 --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/expr/parser.go @@ -0,0 +1,329 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package expr + +import ( + `strconv` + `unicode` + `unsafe` +) + +type _TokenKind uint8 + +const ( + _T_end _TokenKind = iota + 1 + _T_int + _T_punc + _T_name +) + +const ( + _OP2 = 0x80 + _POW = _OP2 | '*' + _SHL = _OP2 | '<' + _SHR = _OP2 | '>' +) + +type _Slice struct { + p unsafe.Pointer + n int + c int +} + +type _Token struct { + pos int + ptr *rune + u64 uint64 + tag _TokenKind +} + +func (self _Token) str() (v string) { + return string(self.rbuf()) +} + +func (self _Token) rbuf() (v []rune) { + (*_Slice)(unsafe.Pointer(&v)).c = int(self.u64) + (*_Slice)(unsafe.Pointer(&v)).n = int(self.u64) + (*_Slice)(unsafe.Pointer(&v)).p = unsafe.Pointer(self.ptr) + return +} + +func tokenEnd(p int) _Token { + return _Token { + pos: p, + tag: _T_end, + } +} + +func tokenInt(p int, v uint64) _Token { + return _Token { + pos: p, + u64: v, + tag: _T_int, + } +} + +func tokenPunc(p int, v rune) _Token { + return _Token { + pos: p, + tag: _T_punc, + u64: uint64(v), + } +} + +func tokenName(p int, v []rune) _Token { + return _Token { + pos: p, + ptr: &v[0], + tag: _T_name, + u64: uint64(len(v)), + } +} + +// Repository represents a repository of Term's. +type Repository interface { + Get(name string) (Term, error) +} + +// Parser parses an expression string to it's AST representation. +type Parser struct { + pos int + src []rune +} + +var binaryOps = [...]func(*Expr, *Expr) *Expr { + '+' : (*Expr).Add, + '-' : (*Expr).Sub, + '*' : (*Expr).Mul, + '/' : (*Expr).Div, + '%' : (*Expr).Mod, + '&' : (*Expr).And, + '^' : (*Expr).Xor, + '|' : (*Expr).Or, + _SHL : (*Expr).Shl, + _SHR : (*Expr).Shr, + _POW : (*Expr).Pow, +} + +var precedence = [...]map[int]bool { + {_SHL: true, _SHR: true}, + {'|' : true}, + {'^' : true}, + {'&' : true}, + {'+' : true, '-': true}, + {'*' : true, '/': true, '%': true}, + {_POW: true}, +} + +func (self *Parser) ch() rune { + return self.src[self.pos] +} + +func (self *Parser) eof() bool { + return self.pos >= len(self.src) +} + +func (self *Parser) rch() (v rune) { + v, self.pos = self.src[self.pos], self.pos + 1 + return +} + +func (self *Parser) hex(ss []rune) bool { + if len(ss) == 1 && ss[0] == '0' { + return unicode.ToLower(self.ch()) == 'x' + } else if len(ss) <= 1 || unicode.ToLower(ss[1]) != 'x' { + return unicode.IsDigit(self.ch()) + } else { + return ishexdigit(self.ch()) + } +} + +func (self *Parser) int(p int, ss []rune) (_Token, error) { + var err error + var val uint64 + + /* find all the digits */ + for !self.eof() && self.hex(ss) { + ss = append(ss, self.rch()) + } + + /* parse the value */ + if val, err = strconv.ParseUint(string(ss), 0, 64); err != nil { + return _Token{}, err + } else { + return tokenInt(p, val), nil + } +} + +func (self *Parser) name(p int, ss []rune) _Token { + for !self.eof() && isident(self.ch()) { ss = append(ss, self.rch()) } + return tokenName(p, ss) +} + +func (self *Parser) read(p int, ch rune) (_Token, error) { + if isdigit(ch) { + return self.int(p, []rune { ch }) + } else if isident0(ch) { + return self.name(p, []rune { ch }), nil + } else if isop2ch(ch) && !self.eof() && self.ch() == ch { + return tokenPunc(p, _OP2 | self.rch()), nil + } else if isop1ch(ch) { + return tokenPunc(p, ch), nil + } else { + return _Token{}, newSyntaxError(self.pos, "invalid character " + strconv.QuoteRuneToASCII(ch)) + } +} + +func (self *Parser) next() (_Token, error) { + for { + var p int + var c rune + + /* check for EOF */ + if self.eof() { + return tokenEnd(self.pos), nil + } + + /* read the next char */ + p = self.pos + c = self.rch() + + /* parse the token if not a space */ + if !unicode.IsSpace(c) { + return self.read(p, c) + } + } +} + +func (self *Parser) grab(tk _Token, repo Repository) (*Expr, error) { + if repo == nil { + return nil, newSyntaxError(tk.pos, "unresolved symbol: " + tk.str()) + } else if term, err := repo.Get(tk.str()); err != nil { + return nil, err + } else { + return Ref(term), nil + } +} + +func (self *Parser) nest(nest int, repo Repository) (*Expr, error) { + var err error + var ret *Expr + var ntk _Token + + /* evaluate the nested expression */ + if ret, err = self.expr(0, nest + 1, repo); err != nil { + return nil, err + } + + /* must follows with a ')' */ + if ntk, err = self.next(); err != nil { + return nil, err + } else if ntk.tag != _T_punc || ntk.u64 != ')' { + return nil, newSyntaxError(ntk.pos, "')' expected") + } else { + return ret, nil + } +} + +func (self *Parser) unit(nest int, repo Repository) (*Expr, error) { + if tk, err := self.next(); err != nil { + return nil, err + } else if tk.tag == _T_int { + return Int(int64(tk.u64)), nil + } else if tk.tag == _T_name { + return self.grab(tk, repo) + } else if tk.tag == _T_punc && tk.u64 == '(' { + return self.nest(nest, repo) + } else if tk.tag == _T_punc && tk.u64 == '+' { + return self.unit(nest, repo) + } else if tk.tag == _T_punc && tk.u64 == '-' { + return neg2(self.unit(nest, repo)) + } else if tk.tag == _T_punc && tk.u64 == '~' { + return not2(self.unit(nest, repo)) + } else { + return nil, newSyntaxError(tk.pos, "integer, unary operator or nested expression expected") + } +} + +func (self *Parser) term(prec int, nest int, repo Repository) (*Expr, error) { + var err error + var val *Expr + + /* parse the LHS operand */ + if val, err = self.expr(prec + 1, nest, repo); err != nil { + return nil, err + } + + /* parse all the operators of the same precedence */ + for { + var op int + var rv *Expr + var tk _Token + + /* peek the next token */ + pp := self.pos + tk, err = self.next() + + /* check for errors */ + if err != nil { + return nil, err + } + + /* encountered EOF */ + if tk.tag == _T_end { + return val, nil + } + + /* must be an operator */ + if tk.tag != _T_punc { + return nil, newSyntaxError(tk.pos, "operators expected") + } + + /* check for the operator precedence */ + if op = int(tk.u64); !precedence[prec][op] { + self.pos = pp + return val, nil + } + + /* evaluate the RHS operand, and combine the value */ + if rv, err = self.expr(prec + 1, nest, repo); err != nil { + return nil, err + } else { + val = binaryOps[op](val, rv) + } + } +} + +func (self *Parser) expr(prec int, nest int, repo Repository) (*Expr, error) { + if prec >= len(precedence) { + return self.unit(nest, repo) + } else { + return self.term(prec, nest, repo) + } +} + +// Parse parses the expression, and returns it's AST tree. +func (self *Parser) Parse(repo Repository) (*Expr, error) { + return self.expr(0, 0, repo) +} + +// SetSource resets the expression parser and sets the expression source. +func (self *Parser) SetSource(src string) *Parser { + self.pos = 0 + self.src = []rune(src) + return self +} diff --git a/vendor/github.com/cloudwego/iasm/expr/pools.go b/vendor/github.com/cloudwego/iasm/expr/pools.go new file mode 100644 index 000000000..869225242 --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/expr/pools.go @@ -0,0 +1,42 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package expr + +import ( + `sync` +) + +var ( + expressionPool sync.Pool +) + +func newExpression() *Expr { + if v := expressionPool.Get(); v == nil { + return new(Expr) + } else { + return resetExpression(v.(*Expr)) + } +} + +func freeExpression(p *Expr) { + expressionPool.Put(p) +} + +func resetExpression(p *Expr) *Expr { + *p = Expr{} + return p +} diff --git a/vendor/github.com/cloudwego/iasm/expr/term.go b/vendor/github.com/cloudwego/iasm/expr/term.go new file mode 100644 index 000000000..45042334b --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/expr/term.go @@ -0,0 +1,23 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package expr + +// Term represents a value that can Evaluate() into an integer. +type Term interface { + Free() + Evaluate() (int64, error) +} diff --git a/vendor/github.com/cloudwego/iasm/expr/utils.go b/vendor/github.com/cloudwego/iasm/expr/utils.go new file mode 100644 index 000000000..780f406e7 --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/expr/utils.go @@ -0,0 +1,77 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package expr + +var op1ch = [...]bool { + '+': true, + '-': true, + '*': true, + '/': true, + '%': true, + '&': true, + '|': true, + '^': true, + '~': true, + '(': true, + ')': true, +} + +var op2ch = [...]bool { + '*': true, + '<': true, + '>': true, +} + +func neg2(v *Expr, err error) (*Expr, error) { + if err != nil { + return nil, err + } else { + return v.Neg(), nil + } +} + +func not2(v *Expr, err error) (*Expr, error) { + if err != nil { + return nil, err + } else { + return v.Not(), nil + } +} + +func isop1ch(ch rune) bool { + return ch >= 0 && int(ch) < len(op1ch) && op1ch[ch] +} + +func isop2ch(ch rune) bool { + return ch >= 0 && int(ch) < len(op2ch) && op2ch[ch] +} + +func isdigit(ch rune) bool { + return ch >= '0' && ch <= '9' +} + +func isident(ch rune) bool { + return isdigit(ch) || isident0(ch) +} + +func isident0(ch rune) bool { + return (ch == '_') || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') +} + +func ishexdigit(ch rune) bool { + return isdigit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') +} diff --git a/vendor/github.com/cloudwego/iasm/x86_64/arch.go b/vendor/github.com/cloudwego/iasm/x86_64/arch.go new file mode 100644 index 000000000..26f1a539d --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/x86_64/arch.go @@ -0,0 +1,251 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package x86_64 + +import ( + `fmt` +) + +// ISA represents an extension to x86-64 instruction set. +type ISA uint64 + +const ( + ISA_CPUID ISA = 1 << iota + ISA_RDTSC + ISA_RDTSCP + ISA_CMOV + ISA_MOVBE + ISA_POPCNT + ISA_LZCNT + ISA_TBM + ISA_BMI + ISA_BMI2 + ISA_ADX + ISA_MMX + ISA_MMX_PLUS + ISA_FEMMS + ISA_3DNOW + ISA_3DNOW_PLUS + ISA_SSE + ISA_SSE2 + ISA_SSE3 + ISA_SSSE3 + ISA_SSE4A + ISA_SSE4_1 + ISA_SSE4_2 + ISA_FMA3 + ISA_FMA4 + ISA_XOP + ISA_F16C + ISA_AVX + ISA_AVX2 + ISA_AVX512F + ISA_AVX512BW + ISA_AVX512DQ + ISA_AVX512VL + ISA_AVX512PF + ISA_AVX512ER + ISA_AVX512CD + ISA_AVX512VBMI + ISA_AVX512IFMA + ISA_AVX512VPOPCNTDQ + ISA_AVX512_4VNNIW + ISA_AVX512_4FMAPS + ISA_PREFETCH + ISA_PREFETCHW + ISA_PREFETCHWT1 + ISA_CLFLUSH + ISA_CLFLUSHOPT + ISA_CLWB + ISA_CLZERO + ISA_RDRAND + ISA_RDSEED + ISA_PCLMULQDQ + ISA_AES + ISA_SHA + ISA_MONITOR + ISA_MONITORX + ISA_ALL = ^ISA(0) +) + +var _ISA_NAMES = map[ISA]string { + ISA_CPUID : "CPUID", + ISA_RDTSC : "RDTSC", + ISA_RDTSCP : "RDTSCP", + ISA_CMOV : "CMOV", + ISA_MOVBE : "MOVBE", + ISA_POPCNT : "POPCNT", + ISA_LZCNT : "LZCNT", + ISA_TBM : "TBM", + ISA_BMI : "BMI", + ISA_BMI2 : "BMI2", + ISA_ADX : "ADX", + ISA_MMX : "MMX", + ISA_MMX_PLUS : "MMX+", + ISA_FEMMS : "FEMMS", + ISA_3DNOW : "3dnow!", + ISA_3DNOW_PLUS : "3dnow!+", + ISA_SSE : "SSE", + ISA_SSE2 : "SSE2", + ISA_SSE3 : "SSE3", + ISA_SSSE3 : "SSSE3", + ISA_SSE4A : "SSE4A", + ISA_SSE4_1 : "SSE4.1", + ISA_SSE4_2 : "SSE4.2", + ISA_FMA3 : "FMA3", + ISA_FMA4 : "FMA4", + ISA_XOP : "XOP", + ISA_F16C : "F16C", + ISA_AVX : "AVX", + ISA_AVX2 : "AVX2", + ISA_AVX512F : "AVX512F", + ISA_AVX512BW : "AVX512BW", + ISA_AVX512DQ : "AVX512DQ", + ISA_AVX512VL : "AVX512VL", + ISA_AVX512PF : "AVX512PF", + ISA_AVX512ER : "AVX512ER", + ISA_AVX512CD : "AVX512CD", + ISA_AVX512VBMI : "AVX512VBMI", + ISA_AVX512IFMA : "AVX512IFMA", + ISA_AVX512VPOPCNTDQ : "AVX512VPOPCNTDQ", + ISA_AVX512_4VNNIW : "AVX512_4VNNIW", + ISA_AVX512_4FMAPS : "AVX512_4FMAPS", + ISA_PREFETCH : "PREFETCH", + ISA_PREFETCHW : "PREFETCHW", + ISA_PREFETCHWT1 : "PREFETCHWT1", + ISA_CLFLUSH : "CLFLUSH", + ISA_CLFLUSHOPT : "CLFLUSHOPT", + ISA_CLWB : "CLWB", + ISA_CLZERO : "CLZERO", + ISA_RDRAND : "RDRAND", + ISA_RDSEED : "RDSEED", + ISA_PCLMULQDQ : "PCLMULQDQ", + ISA_AES : "AES", + ISA_SHA : "SHA", + ISA_MONITOR : "MONITOR", + ISA_MONITORX : "MONITORX", +} + +var _ISA_MAPPING = map[string]ISA { + "CPUID" : ISA_CPUID, + "RDTSC" : ISA_RDTSC, + "RDTSCP" : ISA_RDTSCP, + "CMOV" : ISA_CMOV, + "MOVBE" : ISA_MOVBE, + "POPCNT" : ISA_POPCNT, + "LZCNT" : ISA_LZCNT, + "TBM" : ISA_TBM, + "BMI" : ISA_BMI, + "BMI2" : ISA_BMI2, + "ADX" : ISA_ADX, + "MMX" : ISA_MMX, + "MMX+" : ISA_MMX_PLUS, + "FEMMS" : ISA_FEMMS, + "3dnow!" : ISA_3DNOW, + "3dnow!+" : ISA_3DNOW_PLUS, + "SSE" : ISA_SSE, + "SSE2" : ISA_SSE2, + "SSE3" : ISA_SSE3, + "SSSE3" : ISA_SSSE3, + "SSE4A" : ISA_SSE4A, + "SSE4.1" : ISA_SSE4_1, + "SSE4.2" : ISA_SSE4_2, + "FMA3" : ISA_FMA3, + "FMA4" : ISA_FMA4, + "XOP" : ISA_XOP, + "F16C" : ISA_F16C, + "AVX" : ISA_AVX, + "AVX2" : ISA_AVX2, + "AVX512F" : ISA_AVX512F, + "AVX512BW" : ISA_AVX512BW, + "AVX512DQ" : ISA_AVX512DQ, + "AVX512VL" : ISA_AVX512VL, + "AVX512PF" : ISA_AVX512PF, + "AVX512ER" : ISA_AVX512ER, + "AVX512CD" : ISA_AVX512CD, + "AVX512VBMI" : ISA_AVX512VBMI, + "AVX512IFMA" : ISA_AVX512IFMA, + "AVX512VPOPCNTDQ" : ISA_AVX512VPOPCNTDQ, + "AVX512_4VNNIW" : ISA_AVX512_4VNNIW, + "AVX512_4FMAPS" : ISA_AVX512_4FMAPS, + "PREFETCH" : ISA_PREFETCH, + "PREFETCHW" : ISA_PREFETCHW, + "PREFETCHWT1" : ISA_PREFETCHWT1, + "CLFLUSH" : ISA_CLFLUSH, + "CLFLUSHOPT" : ISA_CLFLUSHOPT, + "CLWB" : ISA_CLWB, + "CLZERO" : ISA_CLZERO, + "RDRAND" : ISA_RDRAND, + "RDSEED" : ISA_RDSEED, + "PCLMULQDQ" : ISA_PCLMULQDQ, + "AES" : ISA_AES, + "SHA" : ISA_SHA, + "MONITOR" : ISA_MONITOR, + "MONITORX" : ISA_MONITORX, +} + +func (self ISA) String() string { + if v, ok := _ISA_NAMES[self]; ok { + return v + } else { + return fmt.Sprintf("(invalid: %#x)", uint64(self)) + } +} + +// ParseISA parses name into ISA, it will panic if the name is invalid. +func ParseISA(name string) ISA { + if v, ok := _ISA_MAPPING[name]; ok { + return v + } else { + panic("invalid ISA name: " + name) + } +} + +// Arch represents the x86_64 architecture. +type Arch struct { + isa ISA +} + +// DefaultArch is the default architecture with all ISA enabled. +var DefaultArch = CreateArch() + +// CreateArch creates a new Arch with all ISA enabled. +func CreateArch() *Arch { + return new(Arch).EnableISA(ISA_ALL) +} + +// HasISA checks if a particular ISA was enabled. +func (self *Arch) HasISA(isa ISA) bool { + return (self.isa & isa) != 0 +} + +// EnableISA enables a particular ISA. +func (self *Arch) EnableISA(isa ISA) *Arch { + self.isa |= isa + return self +} + +// DisableISA disables a particular ISA. +func (self *Arch) DisableISA(isa ISA) *Arch { + self.isa &^= isa + return self +} + +// CreateProgram creates a new empty program. +func (self *Arch) CreateProgram() *Program { + return newProgram(self) +} diff --git a/vendor/github.com/cloudwego/iasm/x86_64/asm.s b/vendor/github.com/cloudwego/iasm/x86_64/asm.s new file mode 100644 index 000000000..a00c41dfc --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/x86_64/asm.s @@ -0,0 +1,16 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + diff --git a/vendor/github.com/cloudwego/iasm/x86_64/assembler.go b/vendor/github.com/cloudwego/iasm/x86_64/assembler.go new file mode 100644 index 000000000..bbe19193a --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/x86_64/assembler.go @@ -0,0 +1,1819 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package x86_64 + +import ( + `bytes` + `errors` + `fmt` + `math` + `strconv` + `strings` + `unicode` + + `github.com/cloudwego/iasm/expr` +) + +type ( + _TokenKind int + _Punctuation int +) + +const ( + _T_end _TokenKind = iota + 1 + _T_int + _T_name + _T_punc + _T_space +) + +const ( + _P_plus _Punctuation = iota + 1 + _P_minus + _P_star + _P_slash + _P_percent + _P_amp + _P_bar + _P_caret + _P_shl + _P_shr + _P_tilde + _P_lbrk + _P_rbrk + _P_dot + _P_comma + _P_colon + _P_dollar + _P_hash +) + +var _PUNC_NAME = map[_Punctuation]string { + _P_plus : "+", + _P_minus : "-", + _P_star : "*", + _P_slash : "/", + _P_percent : "%", + _P_amp : "&", + _P_bar : "|", + _P_caret : "^", + _P_shl : "<<", + _P_shr : ">>", + _P_tilde : "~", + _P_lbrk : "(", + _P_rbrk : ")", + _P_dot : ".", + _P_comma : ",", + _P_colon : ":", + _P_dollar : "$", + _P_hash : "#", +} + +func (self _Punctuation) String() string { + if v, ok := _PUNC_NAME[self]; ok { + return v + } else { + return fmt.Sprintf("_Punctuation(%d)", self) + } +} + +type _Token struct { + pos int + end int + u64 uint64 + str string + tag _TokenKind +} + +func (self *_Token) punc() _Punctuation { + return _Punctuation(self.u64) +} + +func (self *_Token) String() string { + switch self.tag { + case _T_end : return "<END>" + case _T_int : return fmt.Sprintf("<INT %d>", self.u64) + case _T_punc : return fmt.Sprintf("<PUNC %s>", _Punctuation(self.u64)) + case _T_name : return fmt.Sprintf("<NAME %s>", strconv.QuoteToASCII(self.str)) + case _T_space : return "<SPACE>" + default : return fmt.Sprintf("<UNK:%d %d %s>", self.tag, self.u64, strconv.QuoteToASCII(self.str)) + } +} + +func tokenEnd(p int, end int) _Token { + return _Token { + pos: p, + end: end, + tag: _T_end, + } +} + +func tokenInt(p int, val uint64) _Token { + return _Token { + pos: p, + u64: val, + tag: _T_int, + } +} + +func tokenName(p int, name string) _Token { + return _Token { + pos: p, + str: name, + tag: _T_name, + } +} + +func tokenPunc(p int, punc _Punctuation) _Token { + return _Token { + pos: p, + tag: _T_punc, + u64: uint64(punc), + } +} + +func tokenSpace(p int, end int) _Token { + return _Token { + pos: p, + end: end, + tag: _T_space, + } +} + +// SyntaxError represents an error in the assembly syntax. +type SyntaxError struct { + Pos int + Row int + Src []rune + Reason string +} + +// Error implements the error interface. +func (self *SyntaxError) Error() string { + if self.Pos < 0 { + return fmt.Sprintf("%s at line %d", self.Reason, self.Row) + } else { + return fmt.Sprintf("%s at %d:%d", self.Reason, self.Row, self.Pos + 1) + } +} + +type _Tokenizer struct { + pos int + row int + src []rune +} + +func (self *_Tokenizer) ch() rune { + return self.src[self.pos] +} + +func (self *_Tokenizer) eof() bool { + return self.pos >= len(self.src) +} + +func (self *_Tokenizer) rch() (ret rune) { + ret, self.pos = self.src[self.pos], self.pos + 1 + return +} + +func (self *_Tokenizer) err(pos int, msg string) *SyntaxError { + return &SyntaxError { + Pos : pos, + Row : self.row, + Src : self.src, + Reason : msg, + } +} + +type _TrimState int + +const ( + _TS_normal _TrimState = iota + _TS_slcomm + _TS_hscomm + _TS_string + _TS_escape + _TS_accept + _TS_nolast +) + +func (self *_Tokenizer) init(src string) { + var i int + var ch rune + var st _TrimState + + /* set the source */ + self.pos = 0 + self.src = []rune(src) + + /* remove commends, including "//" and "##" */ + loop: for i, ch = range self.src { + switch { + case st == _TS_normal && ch == '/' : st = _TS_slcomm + case st == _TS_normal && ch == '"' : st = _TS_string + case st == _TS_normal && ch == ';' : st = _TS_accept; break loop + case st == _TS_normal && ch == '#' : st = _TS_hscomm + case st == _TS_slcomm && ch == '/' : st = _TS_nolast; break loop + case st == _TS_slcomm : st = _TS_normal + case st == _TS_hscomm && ch == '#' : st = _TS_nolast; break loop + case st == _TS_hscomm : st = _TS_normal + case st == _TS_string && ch == '"' : st = _TS_normal + case st == _TS_string && ch == '\\' : st = _TS_escape + case st == _TS_escape : st = _TS_string + } + } + + /* check for errors */ + switch st { + case _TS_accept: self.src = self.src[:i] + case _TS_nolast: self.src = self.src[:i - 1] + case _TS_string: panic(self.err(i, "string is not terminated")) + case _TS_escape: panic(self.err(i, "escape sequence is not terminated")) + } +} + +func (self *_Tokenizer) skip(check func(v rune) bool) { + for !self.eof() && check(self.ch()) { + self.pos++ + } +} + +func (self *_Tokenizer) find(pos int, check func(v rune) bool) string { + self.skip(check) + return string(self.src[pos:self.pos]) +} + +func (self *_Tokenizer) chrv(p int) _Token { + var err error + var val uint64 + + /* starting and ending position */ + p0 := p + 1 + p1 := p0 + 1 + + /* find the end of the literal */ + for p1 < len(self.src) && self.src[p1] != '\'' { + if p1++; self.src[p1 - 1] == '\\' { + p1++ + } + } + + /* empty literal */ + if p1 == p0 { + panic(self.err(p1, "empty character constant")) + } + + /* check for EOF */ + if p1 == len(self.src) { + panic(self.err(p1, "unexpected EOF when scanning literals")) + } + + /* parse the literal */ + if val, err = literal64(string(self.src[p0:p1])); err != nil { + panic(self.err(p0, "cannot parse literal: " + err.Error())) + } + + /* skip the closing '\'' */ + self.pos = p1 + 1 + return tokenInt(p, val) +} + +func (self *_Tokenizer) numv(p int) _Token { + if val, err := strconv.ParseUint(self.find(p, isnumber), 0, 64); err != nil { + panic(self.err(p, "invalid immediate value: " + err.Error())) + } else { + return tokenInt(p, val) + } +} + +func (self *_Tokenizer) defv(p int, cc rune) _Token { + if isdigit(cc) { + return self.numv(p) + } else if isident0(cc) { + return tokenName(p, self.find(p, isident)) + } else { + panic(self.err(p, "invalid char: " + strconv.QuoteRune(cc))) + } +} + +func (self *_Tokenizer) rep2(p int, pp _Punctuation, cc rune) _Token { + if self.eof() { + panic(self.err(self.pos, "unexpected EOF when scanning operators")) + } else if c := self.rch(); c != cc { + panic(self.err(p + 1, strconv.QuoteRune(cc) + " expected, got " + strconv.QuoteRune(c))) + } else { + return tokenPunc(p, pp) + } +} + +func (self *_Tokenizer) read() _Token { + var p int + var c rune + var t _Token + + /* check for EOF */ + if self.eof() { + return tokenEnd(self.pos, self.pos) + } + + /* skip spaces as needed */ + if p = self.pos; unicode.IsSpace(self.src[p]) { + self.skip(unicode.IsSpace) + return tokenSpace(p, self.pos) + } + + /* check for line comments */ + if p = self.pos; p < len(self.src) - 1 && self.src[p] == '/' && self.src[p + 1] == '/' { + self.pos = len(self.src) + return tokenEnd(p, self.pos) + } + + /* read the next character */ + p = self.pos + c = self.rch() + + /* parse the next character */ + switch c { + case '+' : t = tokenPunc(p, _P_plus) + case '-' : t = tokenPunc(p, _P_minus) + case '*' : t = tokenPunc(p, _P_star) + case '/' : t = tokenPunc(p, _P_slash) + case '%' : t = tokenPunc(p, _P_percent) + case '&' : t = tokenPunc(p, _P_amp) + case '|' : t = tokenPunc(p, _P_bar) + case '^' : t = tokenPunc(p, _P_caret) + case '<' : t = self.rep2(p, _P_shl, '<') + case '>' : t = self.rep2(p, _P_shr, '>') + case '~' : t = tokenPunc(p, _P_tilde) + case '(' : t = tokenPunc(p, _P_lbrk) + case ')' : t = tokenPunc(p, _P_rbrk) + case '.' : t = tokenPunc(p, _P_dot) + case ',' : t = tokenPunc(p, _P_comma) + case ':' : t = tokenPunc(p, _P_colon) + case '$' : t = tokenPunc(p, _P_dollar) + case '#' : t = tokenPunc(p, _P_hash) + case '\'' : t = self.chrv(p) + default : t = self.defv(p, c) + } + + /* mark the end of token */ + t.end = self.pos + return t +} + +func (self *_Tokenizer) next() (tk _Token) { + for { + if tk = self.read(); tk.tag != _T_space { + return + } + } +} + +// LabelKind indicates the type of label reference. +type LabelKind int + +// OperandKind indicates the type of the operand. +type OperandKind int + +// InstructionPrefix indicates the prefix bytes prepended to the instruction. +type InstructionPrefix byte + +const ( + // OpImm means the operand is an immediate value. + OpImm OperandKind = 1 << iota + + // OpReg means the operand is a register. + OpReg + + // OpMem means the operand is a memory address. + OpMem + + // OpLabel means the operand is a label, specifically for + // branch instructions. + OpLabel +) + +const ( + // Declaration means the label is a declaration. + Declaration LabelKind = iota + 1 + + // BranchTarget means the label should be treated as a branch target. + BranchTarget + + // RelativeAddress means the label should be treated as a reference to + // the code section (e.g. RIP-relative addressing). + RelativeAddress +) + +const ( + // PrefixLock causes the processor's LOCK# signal to be asserted during execution of + // the accompanying instruction (turns the instruction into an atomic instruction). + // In a multiprocessor environment, the LOCK# signal insures that the processor + // has exclusive use of any shared memory while the signal is asserted. + PrefixLock InstructionPrefix = iota + + // PrefixSegmentCS overrides the memory operation of this instruction to CS (Code Segment). + PrefixSegmentCS + + // PrefixSegmentDS overrides the memory operation of this instruction to DS (Data Segment), + // this is the default section for most instructions if not specified. + PrefixSegmentDS + + // PrefixSegmentES overrides the memory operation of this instruction to ES (Extra Segment). + PrefixSegmentES + + // PrefixSegmentFS overrides the memory operation of this instruction to FS. + PrefixSegmentFS + + // PrefixSegmentGS overrides the memory operation of this instruction to GS. + PrefixSegmentGS + + // PrefixSegmentSS overrides the memory operation of this instruction to SS (Stack Segment). + PrefixSegmentSS +) + +// ParsedLabel represents a label in the source, either a jump target or +// an RIP-relative addressing. +type ParsedLabel struct { + Name string + Kind LabelKind +} + +// ParsedOperand represents an operand of an instruction in the source. +type ParsedOperand struct { + Op OperandKind + Imm int64 + Reg Register + Label ParsedLabel + Memory MemoryAddress +} + +// ParsedInstruction represents an instruction in the source. +type ParsedInstruction struct { + Mnemonic string + Operands []ParsedOperand + Prefixes []InstructionPrefix +} + +func (self *ParsedInstruction) imm(v int64) { + self.Operands = append(self.Operands, ParsedOperand { + Op : OpImm, + Imm : v, + }) +} + +func (self *ParsedInstruction) reg(v Register) { + self.Operands = append(self.Operands, ParsedOperand { + Op : OpReg, + Reg : v, + }) +} + +func (self *ParsedInstruction) mem(v MemoryAddress) { + self.Operands = append(self.Operands, ParsedOperand { + Op : OpMem, + Memory : v, + }) +} + +func (self *ParsedInstruction) target(v string) { + self.Operands = append(self.Operands, ParsedOperand { + Op : OpLabel, + Label : ParsedLabel { + Name: v, + Kind: BranchTarget, + }, + }) +} + +func (self *ParsedInstruction) reference(v string) { + self.Operands = append(self.Operands, ParsedOperand { + Op : OpLabel, + Label : ParsedLabel { + Name: v, + Kind: RelativeAddress, + }, + }) +} + +// LineKind indicates the type of ParsedLine. +type LineKind int + +const ( + // LineLabel means the ParsedLine is a label. + LineLabel LineKind = iota + 1 + + // LineInstr means the ParsedLine is an instruction. + LineInstr + + // LineCommand means the ParsedLine is a ParsedCommand. + LineCommand +) + +// ParsedLine represents a parsed source line. +type ParsedLine struct { + Row int + Src []rune + Kind LineKind + Label ParsedLabel + Command ParsedCommand + Instruction ParsedInstruction +} + +// ParsedCommand represents a parsed assembly directive command. +type ParsedCommand struct { + Cmd string + Args []ParsedCommandArg +} + +// ParsedCommandArg represents an argument of a ParsedCommand. +type ParsedCommandArg struct { + Value string + IsString bool +} + +// Parser parses the source, and generates a sequence of ParsedInstruction's. +type Parser struct { + lex _Tokenizer + exp expr.Parser +} + +const ( + rip Register64 = 0xff +) + +var _RegBranch = map[string]bool { + "jmp" : true, + "jmpq" : true, + "call" : true, + "callq" : true, +} + +var _SegPrefix = map[string]InstructionPrefix { + "cs": PrefixSegmentCS, + "ds": PrefixSegmentDS, + "es": PrefixSegmentES, + "fs": PrefixSegmentFS, + "gs": PrefixSegmentGS, + "ss": PrefixSegmentSS, +} + +func (self *Parser) i32(tk _Token, v int64) int32 { + if v >= math.MinInt32 && v <= math.MaxUint32 { + return int32(v) + } else { + panic(self.err(tk.pos, fmt.Sprintf("32-bit integer out ouf range: %d", v))) + } +} + +func (self *Parser) err(pos int, msg string) *SyntaxError { + return &SyntaxError { + Pos : pos, + Row : self.lex.row, + Src : self.lex.src, + Reason : msg, + } +} + +func (self *Parser) negv() int64 { + tk := self.lex.read() + tt := tk.tag + + /* must be an integer */ + if tt != _T_int { + panic(self.err(tk.pos, "integer expected after '-'")) + } else { + return -int64(tk.u64) + } +} + +func (self *Parser) eval(p int) (r int64) { + var e error + var v *expr.Expr + + /* searching start */ + n := 1 + q := p + 1 + + /* find the end of expression */ + for n > 0 && q < len(self.lex.src) { + switch self.lex.src[q] { + case '(' : q++; n++ + case ')' : q++; n-- + default : q++ + } + } + + /* check for EOF */ + if n != 0 { + panic(self.err(q, "unexpected EOF when parsing expressions")) + } + + /* evaluate the expression */ + if v, e = self.exp.SetSource(string(self.lex.src[p:q - 1])).Parse(nil); e != nil { + panic(self.err(p, "cannot evaluate expression: " + e.Error())) + } + + /* evaluate the expression */ + if r, e = v.Evaluate(); e != nil { + panic(self.err(p, "cannot evaluate expression: " + e.Error())) + } + + /* skip the last ')' */ + v.Free() + self.lex.pos = q + return +} + +func (self *Parser) relx(tk _Token) { + if tk.tag != _T_punc || tk.punc() != _P_lbrk { + panic(self.err(tk.pos, "'(' expected for RIP-relative addressing")) + } else if tk = self.lex.next(); self.regx(tk) != rip { + panic(self.err(tk.pos, "RIP-relative addressing expects %rip as the base register")) + } else if tk = self.lex.next(); tk.tag != _T_punc || tk.punc() != _P_rbrk { + panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling")) + } +} + +func (self *Parser) immx(tk _Token) int64 { + if tk.tag != _T_punc || tk.punc() != _P_dollar { + panic(self.err(tk.pos, "'$' expected for registers")) + } else if tk = self.lex.read(); tk.tag == _T_int { + return int64(tk.u64) + } else if tk.tag == _T_punc && tk.punc() == _P_lbrk { + return self.eval(self.lex.pos) + } else if tk.tag == _T_punc && tk.punc() == _P_minus { + return self.negv() + } else { + panic(self.err(tk.pos, "immediate value expected")) + } +} + +func (self *Parser) regx(tk _Token) Register { + if tk.tag != _T_punc || tk.punc() != _P_percent { + panic(self.err(tk.pos, "'%' expected for registers")) + } else if tk = self.lex.read(); tk.tag != _T_name { + panic(self.err(tk.pos, "register name expected")) + } else if tk.str == "rip" { + return rip + } else if reg, ok := Registers[tk.str]; ok { + return reg + } else { + panic(self.err(tk.pos, "invalid register name: " + strconv.Quote(tk.str))) + } +} + +func (self *Parser) regv(tk _Token) Register { + if reg := self.regx(tk); reg == rip { + panic(self.err(tk.pos, "%rip is not accessable as a dedicated register")) + } else { + return reg + } +} + +func (self *Parser) disp(vv int32) MemoryAddress { + switch tk := self.lex.next(); tk.tag { + case _T_end : return MemoryAddress { Displacement: vv } + case _T_punc : return self.relm(tk, vv) + default : panic(self.err(tk.pos, "',' or '(' expected")) + } +} + +func (self *Parser) relm(tv _Token, disp int32) MemoryAddress { + var tk _Token + var tt _TokenKind + + /* check for absolute addressing */ + if tv.punc() == _P_comma { + self.lex.pos-- + return MemoryAddress { Displacement: disp } + } + + /* must be '(' now */ + if tv.punc() != _P_lbrk { + panic(self.err(tv.pos, "',' or '(' expected")) + } + + /* read the next token */ + tk = self.lex.next() + tt = tk.tag + + /* must be a punctuation */ + if tt != _T_punc { + panic(self.err(tk.pos, "'%' or ',' expected")) + } + + /* check for base */ + switch tk.punc() { + case _P_percent : return self.base(tk, disp) + case _P_comma : return self.index(nil, disp) + default : panic(self.err(tk.pos, "'%' or ',' expected")) + } +} + +func (self *Parser) base(tk _Token, disp int32) MemoryAddress { + rr := self.regx(tk) + nk := self.lex.next() + + /* check for register indirection or base-index addressing */ + if !isReg64(rr) { + panic(self.err(tk.pos, "not a valid base register")) + } else if nk.tag != _T_punc { + panic(self.err(nk.pos, "',' or ')' expected")) + } else if nk.punc() == _P_comma { + return self.index(rr, disp) + } else if nk.punc() == _P_rbrk { + return MemoryAddress { Base: rr, Displacement: disp } + } else { + panic(self.err(nk.pos, "',' or ')' expected")) + } +} + +func (self *Parser) index(base Register, disp int32) MemoryAddress { + tk := self.lex.next() + rr := self.regx(tk) + nk := self.lex.next() + + /* check for scaled indexing */ + if base == rip { + panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling")) + } else if !isIndexable(rr) { + panic(self.err(tk.pos, "not a valid index register")) + } else if nk.tag != _T_punc { + panic(self.err(nk.pos, "',' or ')' expected")) + } else if nk.punc() == _P_comma { + return self.scale(base, rr, disp) + } else if nk.punc() == _P_rbrk { + return MemoryAddress { Base: base, Index: rr, Scale: 1, Displacement: disp } + } else { + panic(self.err(nk.pos, "',' or ')' expected")) + } +} + +func (self *Parser) scale(base Register, index Register, disp int32) MemoryAddress { + tk := self.lex.next() + tt := tk.tag + tv := tk.u64 + + /* must be an integer */ + if tt != _T_int { + panic(self.err(tk.pos, "integer expected")) + } + + /* scale can only be 1, 2, 4 or 8 */ + if tv == 0 || (_Scales & (1 << tv)) == 0 { + panic(self.err(tk.pos, "scale can only be 1, 2, 4 or 8")) + } + + /* read next token */ + tk = self.lex.next() + tt = tk.tag + + /* check for the closing ')' */ + if tt != _T_punc || tk.punc() != _P_rbrk { + panic(self.err(tk.pos, "')' expected")) + } + + /* construct the memory address */ + return MemoryAddress { + Base : base, + Index : index, + Scale : uint8(tv), + Displacement : disp, + } +} + +func (self *Parser) cmds() *ParsedLine { + cmd := "" + pos := self.lex.pos + buf := []ParsedCommandArg(nil) + + /* find the end of command */ + for p := pos; pos < len(self.lex.src); pos++ { + if unicode.IsSpace(self.lex.src[pos]) { + cmd = string(self.lex.src[p:pos]) + break + } + } + + /* parse the arguments */ + loop: for { + switch self.next(&pos) { + case 0 : break loop + case '#' : break loop + case '"' : pos = self.strings(&buf, pos) + default : pos = self.expressions(&buf, pos) + } + } + + /* construct the line */ + return &ParsedLine { + Row : self.lex.row, + Src : self.lex.src, + Kind : LineCommand, + Command : ParsedCommand { + Cmd : cmd, + Args : buf, + }, + } +} + +func (self *Parser) feed(line string) *ParsedLine { + ff := true + rr := false + lk := false + + /* reset the lexer */ + self.lex.row++ + self.lex.init(line) + + /* parse the first token */ + tk := self.lex.next() + tt := tk.tag + + /* it is a directive if it starts with a dot */ + if tk.tag == _T_punc && tk.punc() == _P_dot { + return self.cmds() + } + + /* otherwise it could be labels or instructions */ + if tt != _T_name { + panic(self.err(tk.pos, "identifier expected")) + } + + /* peek the next token */ + lex := self.lex + tkx := lex.next() + + /* check for labels */ + if tkx.tag == _T_punc && tkx.punc() == _P_colon { + tkx = lex.next() + ttx := tkx.tag + + /* the line must end here */ + if ttx != _T_end { + panic(self.err(tkx.pos, "garbage after label definition")) + } + + /* construct the label */ + return &ParsedLine { + Row : self.lex.row, + Src : self.lex.src, + Kind : LineLabel, + Label : ParsedLabel { + Kind: Declaration, + Name: tk.str, + }, + } + } + + /* special case for the "lock" prefix */ + if tk.tag == _T_name && strings.ToLower(tk.str) == "lock" { + lk = true + tk = self.lex.next() + + /* must be an instruction */ + if tk.tag != _T_name { + panic(self.err(tk.pos, "identifier expected")) + } + } + + /* set the line kind and mnemonic */ + ret := &ParsedLine { + Row : self.lex.row, + Src : self.lex.src, + Kind : LineInstr, + Instruction : ParsedInstruction { Mnemonic: strings.ToLower(tk.str) }, + } + + /* check for LOCK prefix */ + if lk { + ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, PrefixLock) + } + + /* parse all the operands */ + for { + tk = self.lex.next() + tt = tk.tag + + /* check for end of line */ + if tt == _T_end { + break + } + + /* expect a comma if not the first operand */ + if !ff { + if tt == _T_punc && tk.punc() == _P_comma { + tk = self.lex.next() + } else { + panic(self.err(tk.pos, "',' expected")) + } + } + + /* not the first operand anymore */ + ff = false + tt = tk.tag + + /* encountered an integer, must be a SIB memory address */ + if tt == _T_int { + ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64)))) + continue + } + + /* encountered an identifier, maybe an expression or a jump target, or a segment override prefix */ + if tt == _T_name { + ts := tk.str + tp := self.lex.pos + + /* if the next token is EOF or a comma, it's a jumpt target */ + if tk = self.lex.next(); tk.tag == _T_end || (tk.tag == _T_punc && tk.punc() == _P_comma) { + self.lex.pos = tp + ret.Instruction.target(ts) + continue + } + + /* if it is a colon, it's a segment override prefix, otherwise it must be an RIP-relative addressing operand */ + if tk.tag != _T_punc || tk.punc() != _P_colon { + self.relx(tk) + ret.Instruction.reference(ts) + continue + } + + /* lookup segment prefixes */ + if p, ok := _SegPrefix[strings.ToLower(ts)]; !ok { + panic(self.err(tk.pos, "invalid segment name")) + } else { + ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, p) + } + + /* read the next token */ + tk = self.lex.next() + tt = tk.tag + + /* encountered an integer, must be a SIB memory address */ + if tt == _T_int { + ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64)))) + continue + } + } + + /* certain instructions may have a "*" before operands */ + if tt == _T_punc && tk.punc() == _P_star { + tk = self.lex.next() + tt = tk.tag + rr = true + } + + /* ... otherwise it must be a punctuation */ + if tt != _T_punc { + panic(self.err(tk.pos, "'$', '%', '-' or '(' expected")) + } + + /* check the operator */ + switch tk.punc() { + case _P_lbrk : break + case _P_minus : ret.Instruction.mem(self.disp(self.i32(tk, self.negv()))) ; continue + case _P_dollar : ret.Instruction.imm(self.immx(tk)) ; continue + case _P_percent : ret.Instruction.reg(self.regv(tk)) ; continue + default : panic(self.err(tk.pos, "'$', '%', '-' or '(' expected")) + } + + /* special case of '(', might be either `(expr)(SIB)` or just `(SIB)` + * read one more token to confirm */ + tk = self.lex.next() + tt = tk.tag + + /* the next token is '%', it's a memory address, + * or ',' if it's a memory address without base, + * otherwise it must be in `(expr)(SIB)` form */ + if tk.tag == _T_punc && tk.punc() == _P_percent { + ret.Instruction.mem(self.base(tk, 0)) + } else if tk.tag == _T_punc && tk.punc() == _P_comma { + ret.Instruction.mem(self.index(nil, 0)) + } else { + ret.Instruction.mem(self.disp(self.i32(tk, self.eval(tk.pos)))) + } + } + + /* check "jmp" and "call" instructions */ + if !_RegBranch[ret.Instruction.Mnemonic] { + return ret + } else if len(ret.Instruction.Operands) != 1 { + panic(self.err(tk.pos, fmt.Sprintf(`"%s" requires exact 1 argument`, ret.Instruction.Mnemonic))) + } else if !rr && ret.Instruction.Operands[0].Op != OpReg && ret.Instruction.Operands[0].Op != OpLabel { + panic(self.err(tk.pos, fmt.Sprintf(`invalid operand for "%s" instruction`, ret.Instruction.Mnemonic))) + } else { + return ret + } +} + +func (self *Parser) next(p *int) rune { + for { + if *p >= len(self.lex.src) { + return 0 + } else if cc := self.lex.src[*p]; !unicode.IsSpace(cc) { + return cc + } else { + *p++ + } + } +} + +func (self *Parser) delim(p int) int { + if cc := self.next(&p); cc == 0 { + return p + } else if cc == ',' { + return p + 1 + } else { + panic(self.err(p, "',' expected")) + } +} + +func (self *Parser) strings(argv *[]ParsedCommandArg, p int) int { + var i int + var e error + var v string + + /* find the end of string */ + for i = p + 1; i < len(self.lex.src) && self.lex.src[i] != '"'; i++ { + if self.lex.src[i] == '\\' { + i++ + } + } + + /* check for EOF */ + if i == len(self.lex.src) { + panic(self.err(i, "unexpected EOF when scanning strings")) + } + + /* unquote the string */ + if v, e = strconv.Unquote(string(self.lex.src[p:i + 1])); e != nil { + panic(self.err(p, "invalid string: " + e.Error())) + } + + /* add the argument to buffer */ + *argv = append(*argv, ParsedCommandArg { Value: v, IsString: true }) + return self.delim(i + 1) +} + +func (self *Parser) directives(line string) { + self.lex.row++ + self.lex.init(line) + + /* parse the first token */ + tk := self.lex.next() + tt := tk.tag + + /* check for EOF */ + if tt == _T_end { + return + } + + /* must be a directive */ + if tt != _T_punc || tk.punc() != _P_hash { + panic(self.err(tk.pos, "'#' expected")) + } + + /* parse the line number */ + tk = self.lex.next() + tt = tk.tag + + /* must be a line number, if it is, set the row number, and ignore the rest of the line */ + if tt != _T_int { + panic(self.err(tk.pos, "line number expected")) + } else { + self.lex.row = int(tk.u64) - 1 + } +} + +func (self *Parser) expressions(argv *[]ParsedCommandArg, p int) int { + var i int + var n int + var s int + + /* scan until the first standalone ',' or EOF */ + loop: for i = p; i < len(self.lex.src); i++ { + switch self.lex.src[i] { + case ',' : if s == 0 { if n == 0 { break loop } } + case ']', '}', '>' : if s == 0 { if n == 0 { break loop } else { n-- } } + case '[', '{', '<' : if s == 0 { n++ } + case '\\' : if s != 0 { i++ } + case '\'' : if s != 2 { s ^= 1 } + case '"' : if s != 1 { s ^= 2 } + } + } + + /* check for EOF in strings */ + if s != 0 { + panic(self.err(i, "unexpected EOF when scanning strings")) + } + + /* check for bracket matching */ + if n != 0 { + panic(self.err(i, "unbalanced '{' or '[' or '<'")) + } + + /* add the argument to buffer */ + *argv = append(*argv, ParsedCommandArg { Value: string(self.lex.src[p:i]) }) + return self.delim(i) +} + +// Feed feeds the parser with one more line, and the parser +// parses it into a ParsedLine. +// +// NOTE: Feed does not handle empty lines or multiple lines, +// it panics when this happens. Use Parse to parse multiple +// lines of assembly source. +// +func (self *Parser) Feed(src string) (ret *ParsedLine, err error) { + var ok bool + var ss string + var vv interface{} + + /* check for multiple lines */ + if strings.ContainsRune(src, '\n') { + return nil, errors.New("passing multiple lines to Feed()") + } + + /* check for blank lines */ + if ss = strings.TrimSpace(src); ss == "" || ss[0] == '#' || strings.HasPrefix(ss, "//") { + return nil, errors.New("blank line or line with only comments or line-marks") + } + + /* setup error handler */ + defer func() { + if vv = recover(); vv != nil { + if err, ok = vv.(*SyntaxError); !ok { + panic(vv) + } + } + }() + + /* call the actual parser */ + ret = self.feed(src) + return +} + +// Parse parses the entire assembly source (possibly multiple lines) into +// a sequence of *ParsedLine. +func (self *Parser) Parse(src string) (ret []*ParsedLine, err error) { + var ok bool + var ss string + var vv interface{} + + /* setup error handler */ + defer func() { + if vv = recover(); vv != nil { + if err, ok = vv.(*SyntaxError); !ok { + panic(vv) + } + } + }() + + /* feed every line */ + for _, line := range strings.Split(src, "\n") { + if ss = strings.TrimSpace(line); ss == "" || strings.HasPrefix(ss, "//") { + self.lex.row++ + } else if ss[0] == '#' { + self.directives(line) + } else { + ret = append(ret, self.feed(line)) + } + } + + /* all done */ + err = nil + return +} + +// Directive handles the directive. +func (self *Parser) Directive(line string) (err error) { + var ok bool + var ss string + var vv interface{} + + /* check for directives */ + if ss = strings.TrimSpace(line); ss == "" || ss[0] != '#' { + return errors.New("not a directive") + } + + /* setup error handler */ + defer func() { + if vv = recover(); vv != nil { + if err, ok = vv.(*SyntaxError); !ok { + panic(vv) + } + } + }() + + /* call the directive parser */ + self.directives(line) + return +} + +type _TermRepo struct { + terms map[string]expr.Term +} + +func (self *_TermRepo) Get(name string) (expr.Term, error) { + if ret, ok := self.terms[name]; ok { + return ret, nil + } else { + return nil, errors.New("undefined name: " + name) + } +} + +func (self *_TermRepo) label(name string) (*Label, error) { + var ok bool + var lb *Label + var tr expr.Term + + /* check for existing terms */ + if tr, ok = self.terms[name]; ok { + if lb, ok = tr.(*Label); ok { + return lb, nil + } else { + return nil, errors.New("name is not a label: " + name) + } + } + + /* create a new one as needed */ + lb = new(Label) + lb.Name = name + + /* create the map if needed */ + if self.terms == nil { + self.terms = make(map[string]expr.Term, 1) + } + + /* register the label */ + self.terms[name] = lb + return lb, nil +} + +func (self *_TermRepo) define(name string, term expr.Term) { + var ok bool + var tr expr.Term + + /* create the map if needed */ + if self.terms == nil { + self.terms = make(map[string]expr.Term, 1) + } + + /* check for existing terms */ + if tr, ok = self.terms[name]; !ok { + self.terms[name] = term + } else if _, ok = tr.(*Label); !ok { + self.terms[name] = term + } else { + panic("conflicting term types: " + name) + } +} + +// _Command describes an assembler command. +// +// The _Command.args describes both the arity and argument type with characters, +// the length is the number of arguments, the character itself represents the +// argument type. +// +// Possible values are: +// +// s This argument should be a string +// e This argument should be an expression +// ? The next argument is optional, and must be the last argument. +// +type _Command struct { + args string + handler func(*Assembler, *Program, []ParsedCommandArg) error +} + +// Options controls the behavior of Assembler. +type Options struct { + // InstructionAliasing specifies whether to enable instruction aliasing. + // Set to true enables instruction aliasing, and the Assembler will try harder to find instructions. + InstructionAliasing bool + + // IgnoreUnknownDirectives specifies whether to report errors when encountered unknown directives. + // Set to true ignores all unknwon directives silently, useful for parsing generated assembly. + IgnoreUnknownDirectives bool +} + +// Assembler assembles the entire assembly program and generates the corresponding +// machine code representations. +type Assembler struct { + cc int + ps Parser + pc uintptr + buf []byte + main string + opts Options + repo _TermRepo + expr expr.Parser + line *ParsedLine +} + +var asmCommands = map[string]_Command { + "org" : { "e" , (*Assembler).assembleCommandOrg }, + "set" : { "ee" , (*Assembler).assembleCommandSet }, + "byte" : { "e" , (*Assembler).assembleCommandByte }, + "word" : { "e" , (*Assembler).assembleCommandWord }, + "long" : { "e" , (*Assembler).assembleCommandLong }, + "quad" : { "e" , (*Assembler).assembleCommandQuad }, + "fill" : { "e?e" , (*Assembler).assembleCommandFill }, + "space" : { "e?e" , (*Assembler).assembleCommandFill }, + "align" : { "e?e" , (*Assembler).assembleCommandAlign }, + "entry" : { "e" , (*Assembler).assembleCommandEntry }, + "ascii" : { "s" , (*Assembler).assembleCommandAscii }, + "asciz" : { "s" , (*Assembler).assembleCommandAsciz }, + "p2align" : { "e?e" , (*Assembler).assembleCommandP2Align }, +} + +func (self *Assembler) err(msg string) *SyntaxError { + return &SyntaxError { + Pos : -1, + Row : self.line.Row, + Src : self.line.Src, + Reason : msg, + } +} + +func (self *Assembler) eval(expr string) (int64, error) { + if exp, err := self.expr.SetSource(expr).Parse(nil); err != nil { + return 0, err + } else { + return exp.Evaluate() + } +} + +func (self *Assembler) checkArgs(i int, n int, v *ParsedCommand, isString bool) error { + if i >= len(v.Args) { + return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(v.Cmd), n)) + } else if isString && !v.Args[i].IsString { + return self.err(fmt.Sprintf("argument %d of command %s must be a string", i + 1, strconv.Quote(v.Cmd))) + } else if !isString && v.Args[i].IsString { + return self.err(fmt.Sprintf("argument %d of command %s must be an expression", i + 1, strconv.Quote(v.Cmd))) + } else { + return nil + } +} + +func (self *Assembler) assembleLabel(p *Program, lb *ParsedLabel) error { + if v, err := self.repo.label(lb.Name); err != nil { + return err + } else { + p.Link(v) + return nil + } +} + +func (self *Assembler) assembleInstr(p *Program, line *ParsedInstruction) (err error) { + var ok bool + var pfx []byte + var ops []interface{} + var enc _InstructionEncoder + + /* convert to lower-case */ + opts := self.opts + name := strings.ToLower(line.Mnemonic) + + /* fix register-addressing branches if needed */ + if opts.InstructionAliasing && len(line.Operands) == 1 { + switch { + case name == "retq" : name = "ret" + case name == "movabsq" : name = "movq" + case name == "jmp" && line.Operands[0].Op != OpLabel : name = "jmpq" + case name == "jmpq" && line.Operands[0].Op == OpLabel : name = "jmp" + case name == "call" && line.Operands[0].Op != OpLabel : name = "callq" + case name == "callq" && line.Operands[0].Op == OpLabel : name = "call" + } + } + + /* lookup from the alias table if needed */ + if opts.InstructionAliasing { + enc, ok = _InstructionAliases[name] + } + + /* lookup from the instruction table */ + if !ok { + enc, ok = Instructions[name] + } + + /* remove size suffix if possible */ + if !ok && opts.InstructionAliasing { + switch i := len(name) - 1; name[i] { + case 'b', 'w', 'l', 'q': { + enc, ok = Instructions[name[:i]] + } + } + } + + /* check for instruction name */ + if !ok { + return self.err("no such instruction: " + strconv.Quote(name)) + } + + /* allocate memory for prefix if any */ + if len(line.Prefixes) != 0 { + pfx = make([]byte, len(line.Prefixes)) + } + + /* convert the prefixes */ + for i, v := range line.Prefixes { + switch v { + case PrefixLock : pfx[i] = _P_lock + case PrefixSegmentCS : pfx[i] = _P_cs + case PrefixSegmentDS : pfx[i] = _P_ds + case PrefixSegmentES : pfx[i] = _P_es + case PrefixSegmentFS : pfx[i] = _P_fs + case PrefixSegmentGS : pfx[i] = _P_gs + case PrefixSegmentSS : pfx[i] = _P_ss + default : panic("unreachable: invalid segment prefix") + } + } + + /* convert the operands */ + for _, op := range line.Operands { + switch op.Op { + case OpImm : ops = append(ops, op.Imm) + case OpReg : ops = append(ops, op.Reg) + case OpMem : self.assembleInstrMem(&ops, op.Memory) + case OpLabel : self.assembleInstrLabel(&ops, op.Label) + default : panic("parser yields an invalid operand kind") + } + } + + /* catch any exceptions in the encoder */ + defer func() { + if v := recover(); v != nil { + err = self.err(fmt.Sprint(v)) + } + }() + + /* encode the instruction */ + enc(p, ops...).prefix = pfx + return nil +} + +func (self *Assembler) assembleInstrMem(ops *[]interface{}, addr MemoryAddress) { + mem := new(MemoryOperand) + *ops = append(*ops, mem) + + /* check for RIP-relative addressing */ + if addr.Base != rip { + mem.Addr.Type = Memory + mem.Addr.Memory = addr + } else { + mem.Addr.Type = Offset + mem.Addr.Offset = RelativeOffset(addr.Displacement) + } +} + +func (self *Assembler) assembleInstrLabel(ops *[]interface{}, label ParsedLabel) { + vk := label.Kind + tr, err := self.repo.label(label.Name) + + /* check for errors */ + if err != nil { + panic(err) + } + + /* check for branch target */ + if vk == BranchTarget { + *ops = append(*ops, tr) + return + } + + /* add to ops */ + *ops = append(*ops, &MemoryOperand { + Addr: Addressable { + Type : Reference, + Reference : tr, + }, + }) +} + +func (self *Assembler) assembleCommand(p *Program, line *ParsedCommand) error { + var iv int + var cc rune + var ok bool + var va bool + var fn _Command + + /* find the command */ + if fn, ok = asmCommands[line.Cmd]; !ok { + if self.opts.IgnoreUnknownDirectives { + return nil + } else { + return self.err("no such command: " + strconv.Quote(line.Cmd)) + } + } + + /* expected & real argument count */ + argx := len(fn.args) + argc := len(line.Args) + + /* check the arguments */ + loop: for iv, cc = range fn.args { + switch cc { + case '?' : va = true; break loop + case 's' : if err := self.checkArgs(iv, argx, line, true) ; err != nil { return err } + case 'e' : if err := self.checkArgs(iv, argx, line, false) ; err != nil { return err } + default : panic("invalid argument descriptor: " + strconv.Quote(fn.args)) + } + } + + /* simple case: non-variadic command */ + if !va { + if argc == argx { + return fn.handler(self, p, line.Args) + } else { + return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(line.Cmd), argx)) + } + } + + /* check for the descriptor */ + if iv != argx - 2 { + panic("invalid argument descriptor: " + strconv.Quote(fn.args)) + } + + /* variadic command and the final optional argument is set */ + if argc == argx - 1 { + switch fn.args[argx - 1] { + case 's' : if err := self.checkArgs(iv, -1, line, true) ; err != nil { return err } + case 'e' : if err := self.checkArgs(iv, -1, line, false) ; err != nil { return err } + default : panic("invalid argument descriptor: " + strconv.Quote(fn.args)) + } + } + + /* check argument count */ + if argc == argx - 1 || argc == argx - 2 { + return fn.handler(self, p, line.Args) + } else { + return self.err(fmt.Sprintf("command %s takes %d or %d arguments", strconv.Quote(line.Cmd), argx - 2, argx - 1)) + } +} + +func (self *Assembler) assembleCommandInt(p *Program, argv []ParsedCommandArg, addfn func(*Program, *expr.Expr) *Instruction) error { + var err error + var val *expr.Expr + + /* parse the expression */ + if val, err = self.expr.SetSource(argv[0].Value).Parse(&self.repo); err != nil { + return err + } + + /* add to the program */ + addfn(p, val) + return nil +} + +func (self *Assembler) assembleCommandOrg(_ *Program, argv []ParsedCommandArg) error { + var err error + var val int64 + + /* evaluate the expression */ + if val, err = self.eval(argv[0].Value); err != nil { + return err + } + + /* check for origin */ + if val < 0 { + return self.err(fmt.Sprintf("negative origin: %d", val)) + } + + /* ".org" must be the first command if any */ + if self.cc != 1 { + return self.err(".org must be the first command if present") + } + + /* set the initial program counter */ + self.pc = uintptr(val) + return nil +} + +func (self *Assembler) assembleCommandSet(_ *Program, argv []ParsedCommandArg) error { + var err error + var val *expr.Expr + + /* parse the expression */ + if val, err = self.expr.SetSource(argv[1].Value).Parse(&self.repo); err != nil { + return err + } + + /* define the new identifier */ + self.repo.define(argv[0].Value, val) + return nil +} + +func (self *Assembler) assembleCommandByte(p *Program, argv []ParsedCommandArg) error { + return self.assembleCommandInt(p, argv, (*Program).Byte) +} + +func (self *Assembler) assembleCommandWord(p *Program, argv []ParsedCommandArg) error { + return self.assembleCommandInt(p, argv, (*Program).Word) +} + +func (self *Assembler) assembleCommandLong(p *Program, argv []ParsedCommandArg) error { + return self.assembleCommandInt(p, argv, (*Program).Long) +} + +func (self *Assembler) assembleCommandQuad(p *Program, argv []ParsedCommandArg) error { + return self.assembleCommandInt(p, argv, (*Program).Quad) +} + +func (self *Assembler) assembleCommandFill(p *Program, argv []ParsedCommandArg) error { + var fv byte + var nb int64 + var ex error + + /* evaluate the size */ + if nb, ex = self.eval(argv[0].Value); ex != nil { + return ex + } + + /* check for filling size */ + if nb < 0 { + return self.err(fmt.Sprintf("negative filling size: %d", nb)) + } + + /* check for optional filling value */ + if len(argv) == 2 { + if val, err := self.eval(argv[1].Value); err != nil { + return err + } else if val < math.MinInt8 || val > math.MaxUint8 { + return self.err(fmt.Sprintf("value %d cannot be represented with a byte", val)) + } else { + fv = byte(val) + } + } + + /* fill with specified byte */ + p.Data(bytes.Repeat([]byte { fv }, int(nb))) + return nil +} + +func (self *Assembler) assembleCommandAlign(p *Program, argv []ParsedCommandArg) error { + var nb int64 + var ex error + var fv *expr.Expr + + /* evaluate the size */ + if nb, ex = self.eval(argv[0].Value); ex != nil { + return ex + } + + /* check for alignment value */ + if nb <= 0 { + return self.err(fmt.Sprintf("zero or negative alignment: %d", nb)) + } + + /* alignment must be a power of 2 */ + if (nb & (nb - 1)) != 0 { + return self.err(fmt.Sprintf("alignment must be a power of 2: %d", nb)) + } + + /* check for optional filling value */ + if len(argv) == 2 { + if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil { + fv = v + } else { + return err + } + } + + /* fill with specified byte, default to 0 if not specified */ + p.Align(uint64(nb), fv) + return nil +} + +func (self *Assembler) assembleCommandEntry(_ *Program, argv []ParsedCommandArg) error { + name := argv[0].Value + rbuf := []rune(name) + + /* check all the characters */ + for i, cc := range rbuf { + if !isident0(cc) && (i == 0 || !isident(cc)) { + return self.err("entry point must be a label name") + } + } + + /* set the main entry point */ + self.main = name + return nil +} + +func (self *Assembler) assembleCommandAscii(p *Program, argv []ParsedCommandArg) error { + p.Data([]byte(argv[0].Value)) + return nil +} + +func (self *Assembler) assembleCommandAsciz(p *Program, argv []ParsedCommandArg) error { + p.Data(append([]byte(argv[0].Value), 0)) + return nil +} + +func (self *Assembler) assembleCommandP2Align(p *Program, argv []ParsedCommandArg) error { + var nb int64 + var ex error + var fv *expr.Expr + + /* evaluate the size */ + if nb, ex = self.eval(argv[0].Value); ex != nil { + return ex + } + + /* check for alignment value */ + if nb <= 0 { + return self.err(fmt.Sprintf("zero or negative alignment: %d", nb)) + } + + /* check for optional filling value */ + if len(argv) == 2 { + if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil { + fv = v + } else { + return err + } + } + + /* fill with specified byte, default to 0 if not specified */ + p.Align(1 << nb, fv) + return nil +} + +// Base returns the origin. +func (self *Assembler) Base() uintptr { + return self.pc +} + +// Code returns the assembled machine code. +func (self *Assembler) Code() []byte { + return self.buf +} + +// Entry returns the address of the specified entry point, or the origin if not specified. +func (self *Assembler) Entry() uintptr { + if self.main == "" { + return self.pc + } else if tr, err := self.repo.Get(self.main); err != nil { + panic(err) + } else if val, err := tr.Evaluate(); err != nil { + panic(err) + } else { + return uintptr(val) + } +} + +// Options returns the internal options reference, changing it WILL affect this Assembler instance. +func (self *Assembler) Options() *Options { + return &self.opts +} + +// WithBase resets the origin to pc. +func (self *Assembler) WithBase(pc uintptr) *Assembler { + self.pc = pc + return self +} + +// Assemble assembles the assembly source and save the machine code to internal buffer. +func (self *Assembler) Assemble(src string) error { + var err error + var buf []*ParsedLine + + /* parse the source */ + if buf, err = self.ps.Parse(src); err != nil { + return err + } + + /* create a new program */ + p := DefaultArch.CreateProgram() + defer p.Free() + + /* process every line */ + for _, self.line = range buf { + switch self.cc++; self.line.Kind { + case LineLabel : if err = self.assembleLabel (p, &self.line.Label) ; err != nil { return err } + case LineInstr : if err = self.assembleInstr (p, &self.line.Instruction) ; err != nil { return err } + case LineCommand : if err = self.assembleCommand (p, &self.line.Command) ; err != nil { return err } + default : panic("parser yields an invalid line kind") + } + } + + /* assemble the program */ + self.buf = p.Assemble(self.pc) + return nil +} diff --git a/vendor/github.com/cloudwego/iasm/x86_64/assembler_alias.go b/vendor/github.com/cloudwego/iasm/x86_64/assembler_alias.go new file mode 100644 index 000000000..e8f1f725c --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/x86_64/assembler_alias.go @@ -0,0 +1,49 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package x86_64 + +func alias_INT3(p *Program, vv ...interface{}) *Instruction { + if len(vv) == 0 { + return p.INT(3) + } else { + panic("instruction INT3 takes no operands") + } +} + +func alias_VCMPEQPS(p *Program, vv ...interface{}) *Instruction { + if len(vv) >= 3 { + return p.VCMPPS(0x00, vv[0], vv[1], vv[2], vv[3:]...) + } else { + panic("instruction VCMPEQPS takes 3 or 4 operands") + } +} + +func alias_VCMPTRUEPS(p *Program, vv ...interface{}) *Instruction { + if len(vv) >= 3 { + return p.VCMPPS(0x0f, vv[0], vv[1], vv[2], vv[3:]...) + } else { + panic("instruction VCMPTRUEPS takes 3 or 4 operands") + } +} + +var _InstructionAliases = map[string]_InstructionEncoder { + "int3" : alias_INT3, + "retq" : Instructions["ret"], + "movabsq" : Instructions["movq"], + "vcmpeqps" : alias_VCMPEQPS, + "vcmptrueps" : alias_VCMPTRUEPS, +} diff --git a/vendor/github.com/cloudwego/iasm/x86_64/eface.go b/vendor/github.com/cloudwego/iasm/x86_64/eface.go new file mode 100644 index 000000000..ad3967b4f --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/x86_64/eface.go @@ -0,0 +1,79 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package x86_64 + +import ( + `reflect` + `unsafe` +) + +type _GoType struct { + size uintptr + pdata uintptr + hash uint32 + flags uint8 + align uint8 + falign uint8 + kflags uint8 + traits unsafe.Pointer + gcdata *byte + str int32 + ptrx int32 +} + +const ( + _KindMask = (1 << 5) - 1 +) + +func (self *_GoType) kind() reflect.Kind { + return reflect.Kind(self.kflags & _KindMask) +} + +type _GoSlice struct { + ptr unsafe.Pointer + len int + cap int +} + +type _GoEface struct { + vt *_GoType + ptr unsafe.Pointer +} + +func (self *_GoEface) kind() reflect.Kind { + if self.vt != nil { + return self.vt.kind() + } else { + return reflect.Invalid + } +} + +func (self *_GoEface) toInt64() int64 { + if self.vt.size == 8 { + return *(*int64)(self.ptr) + } else if self.vt.size == 4 { + return int64(*(*int32)(self.ptr)) + } else if self.vt.size == 2 { + return int64(*(*int16)(self.ptr)) + } else { + return int64(*(*int8)(self.ptr)) + } +} + +func efaceOf(v interface{}) _GoEface { + return *(*_GoEface)(unsafe.Pointer(&v)) +} diff --git a/vendor/github.com/cloudwego/iasm/x86_64/encodings.go b/vendor/github.com/cloudwego/iasm/x86_64/encodings.go new file mode 100644 index 000000000..a0d96db92 --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/x86_64/encodings.go @@ -0,0 +1,691 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package x86_64 + +import ( + `encoding/binary` + `math` +) + +/** Operand Encoding Helpers **/ + +func imml(v interface{}) byte { + return byte(toImmAny(v) & 0x0f) +} + +func relv(v interface{}) int64 { + switch r := v.(type) { + case *Label : return 0 + case RelativeOffset : return int64(r) + default : panic("invalid relative offset") + } +} + +func addr(v interface{}) interface{} { + switch a := v.(*MemoryOperand).Addr; a.Type { + case Memory : return a.Memory + case Offset : return a.Offset + case Reference : return a.Reference + default : panic("invalid memory operand type") + } +} + +func bcode(v interface{}) byte { + if m, ok := v.(*MemoryOperand); !ok { + panic("v is not a memory operand") + } else if m.Broadcast == 0 { + return 0 + } else { + return 1 + } +} + +func vcode(v interface{}) byte { + switch r := v.(type) { + case XMMRegister : return byte(r) + case YMMRegister : return byte(r) + case ZMMRegister : return byte(r) + case MaskedRegister : return vcode(r.Reg) + default : panic("v is not a vector register") + } +} + +func kcode(v interface{}) byte { + switch r := v.(type) { + case KRegister : return byte(r) + case XMMRegister : return 0 + case YMMRegister : return 0 + case ZMMRegister : return 0 + case RegisterMask : return byte(r.K) + case MaskedRegister : return byte(r.Mask.K) + case *MemoryOperand : return toKcodeMem(r) + default : panic("v is not a maskable operand") + } +} + +func zcode(v interface{}) byte { + switch r := v.(type) { + case KRegister : return 0 + case XMMRegister : return 0 + case YMMRegister : return 0 + case ZMMRegister : return 0 + case RegisterMask : return toZcodeRegM(r) + case MaskedRegister : return toZcodeRegM(r.Mask) + case *MemoryOperand : return toZcodeMem(r) + default : panic("v is not a maskable operand") + } +} + +func lcode(v interface{}) byte { + switch r := v.(type) { + case Register8 : return byte(r & 0x07) + case Register16 : return byte(r & 0x07) + case Register32 : return byte(r & 0x07) + case Register64 : return byte(r & 0x07) + case KRegister : return byte(r & 0x07) + case MMRegister : return byte(r & 0x07) + case XMMRegister : return byte(r & 0x07) + case YMMRegister : return byte(r & 0x07) + case ZMMRegister : return byte(r & 0x07) + case MaskedRegister : return lcode(r.Reg) + default : panic("v is not a register") + } +} + +func hcode(v interface{}) byte { + switch r := v.(type) { + case Register8 : return byte(r >> 3) & 1 + case Register16 : return byte(r >> 3) & 1 + case Register32 : return byte(r >> 3) & 1 + case Register64 : return byte(r >> 3) & 1 + case KRegister : return byte(r >> 3) & 1 + case MMRegister : return byte(r >> 3) & 1 + case XMMRegister : return byte(r >> 3) & 1 + case YMMRegister : return byte(r >> 3) & 1 + case ZMMRegister : return byte(r >> 3) & 1 + case MaskedRegister : return hcode(r.Reg) + default : panic("v is not a register") + } +} + +func ecode(v interface{}) byte { + switch r := v.(type) { + case Register8 : return byte(r >> 4) & 1 + case Register16 : return byte(r >> 4) & 1 + case Register32 : return byte(r >> 4) & 1 + case Register64 : return byte(r >> 4) & 1 + case KRegister : return byte(r >> 4) & 1 + case MMRegister : return byte(r >> 4) & 1 + case XMMRegister : return byte(r >> 4) & 1 + case YMMRegister : return byte(r >> 4) & 1 + case ZMMRegister : return byte(r >> 4) & 1 + case MaskedRegister : return ecode(r.Reg) + default : panic("v is not a register") + } +} + +func hlcode(v interface{}) byte { + switch r := v.(type) { + case Register8 : return toHLcodeReg8(r) + case Register16 : return byte(r & 0x0f) + case Register32 : return byte(r & 0x0f) + case Register64 : return byte(r & 0x0f) + case KRegister : return byte(r & 0x0f) + case MMRegister : return byte(r & 0x0f) + case XMMRegister : return byte(r & 0x0f) + case YMMRegister : return byte(r & 0x0f) + case ZMMRegister : return byte(r & 0x0f) + case MaskedRegister : return hlcode(r.Reg) + default : panic("v is not a register") + } +} + +func ehcode(v interface{}) byte { + switch r := v.(type) { + case Register8 : return byte(r >> 3) & 0x03 + case Register16 : return byte(r >> 3) & 0x03 + case Register32 : return byte(r >> 3) & 0x03 + case Register64 : return byte(r >> 3) & 0x03 + case KRegister : return byte(r >> 3) & 0x03 + case MMRegister : return byte(r >> 3) & 0x03 + case XMMRegister : return byte(r >> 3) & 0x03 + case YMMRegister : return byte(r >> 3) & 0x03 + case ZMMRegister : return byte(r >> 3) & 0x03 + case MaskedRegister : return ehcode(r.Reg) + default : panic("v is not a register") + } +} + +func toImmAny(v interface{}) int64 { + if x, ok := asInt64(v); ok { + return x + } else { + panic("value is not an integer") + } +} + +func toHcodeOpt(v interface{}) byte { + if v == nil { + return 0 + } else { + return hcode(v) + } +} + +func toEcodeVMM(v interface{}, x byte) byte { + switch r := v.(type) { + case XMMRegister : return ecode(r) + case YMMRegister : return ecode(r) + case ZMMRegister : return ecode(r) + default : return x + } +} + +func toKcodeMem(v *MemoryOperand) byte { + if !v.Masked { + return 0 + } else { + return byte(v.Mask.K) + } +} + +func toZcodeMem(v *MemoryOperand) byte { + if !v.Masked || v.Mask.Z { + return 0 + } else { + return 1 + } +} + +func toZcodeRegM(v RegisterMask) byte { + if v.Z { + return 1 + } else { + return 0 + } +} + +func toHLcodeReg8(v Register8) byte { + switch v { + case AH: fallthrough + case BH: fallthrough + case CH: fallthrough + case DH: panic("ah/bh/ch/dh registers never use 4-bit encoding") + default: return byte(v & 0x0f) + } +} + +/** Instruction Encoding Helpers **/ + +const ( + _N_inst = 16 +) + +const ( + _F_rel1 = 1 << iota + _F_rel4 +) + +type _Encoding struct { + len int + flags int + bytes [_N_inst]byte + encoder func(m *_Encoding, v []interface{}) +} + +// buf ensures len + n <= len(bytes). +func (self *_Encoding) buf(n int) []byte { + if i := self.len; i + n > _N_inst { + panic("instruction too long") + } else { + return self.bytes[i:] + } +} + +// emit encodes a single byte. +func (self *_Encoding) emit(v byte) { + self.buf(1)[0] = v + self.len++ +} + +// imm1 encodes a single byte immediate value. +func (self *_Encoding) imm1(v int64) { + self.emit(byte(v)) +} + +// imm2 encodes a two-byte immediate value in little-endian. +func (self *_Encoding) imm2(v int64) { + binary.LittleEndian.PutUint16(self.buf(2), uint16(v)) + self.len += 2 +} + +// imm4 encodes a 4-byte immediate value in little-endian. +func (self *_Encoding) imm4(v int64) { + binary.LittleEndian.PutUint32(self.buf(4), uint32(v)) + self.len += 4 +} + +// imm8 encodes an 8-byte immediate value in little-endian. +func (self *_Encoding) imm8(v int64) { + binary.LittleEndian.PutUint64(self.buf(8), uint64(v)) + self.len += 8 +} + +// vex2 encodes a 2-byte or 3-byte VEX prefix. +// +// 2-byte VEX prefix: +// Requires: VEX.W = 0, VEX.mmmmm = 0b00001 and VEX.B = VEX.X = 0 +// +----------------+ +// Byte 0: | Bits 0-7: 0xc5 | +// +----------------+ +// +// +-----------+----------------+----------+--------------+ +// Byte 1: | Bit 7: ~R | Bits 3-6 ~vvvv | Bit 2: L | Bits 0-1: pp | +// +-----------+----------------+----------+--------------+ +// +// 3-byte VEX prefix: +// +----------------+ +// Byte 0: | Bits 0-7: 0xc4 | +// +----------------+ +// +// +-----------+-----------+-----------+-------------------+ +// Byte 1: | Bit 7: ~R | Bit 6: ~X | Bit 5: ~B | Bits 0-4: 0b00001 | +// +-----------+-----------+-----------+-------------------+ +// +// +----------+-----------------+----------+--------------+ +// Byte 2: | Bit 7: 0 | Bits 3-6: ~vvvv | Bit 2: L | Bits 0-1: pp | +// +----------+-----------------+----------+--------------+ +// +func (self *_Encoding) vex2(lpp byte, r byte, rm interface{}, vvvv byte) { + var b byte + var x byte + + /* VEX.R must be a single-bit mask */ + if r > 1 { + panic("VEX.R must be a 1-bit mask") + } + + /* VEX.Lpp must be a 3-bit mask */ + if lpp &^ 0b111 != 0 { + panic("VEX.Lpp must be a 3-bit mask") + } + + /* VEX.vvvv must be a 4-bit mask */ + if vvvv &^ 0b1111 != 0 { + panic("VEX.vvvv must be a 4-bit mask") + } + + /* encode the RM bits if any */ + if rm != nil { + switch v := rm.(type) { + case *Label : break + case Register : b = hcode(v) + case MemoryAddress : b, x = toHcodeOpt(v.Base), toHcodeOpt(v.Index) + case RelativeOffset : break + default : panic("rm is expected to be a register or a memory address") + } + } + + /* if VEX.B and VEX.X are zeroes, 2-byte VEX prefix can be used */ + if x == 0 && b == 0 { + self.emit(0xc5) + self.emit(0xf8 ^ (r << 7) ^ (vvvv << 3) ^ lpp) + } else { + self.emit(0xc4) + self.emit(0xe1 ^ (r << 7) ^ (x << 6) ^ (b << 5)) + self.emit(0x78 ^ (vvvv << 3) ^ lpp) + } +} + +// vex3 encodes a 3-byte VEX or XOP prefix. +// +// 3-byte VEX/XOP prefix +// +-----------------------------------+ +// Byte 0: | Bits 0-7: 0xc4 (VEX) / 0x8f (XOP) | +// +-----------------------------------+ +// +// +-----------+-----------+-----------+-----------------+ +// Byte 1: | Bit 7: ~R | Bit 6: ~X | Bit 5: ~B | Bits 0-4: mmmmm | +// +-----------+-----------+-----------+-----------------+ +// +// +----------+-----------------+----------+--------------+ +// Byte 2: | Bit 7: W | Bits 3-6: ~vvvv | Bit 2: L | Bits 0-1: pp | +// +----------+-----------------+----------+--------------+ +// +func (self *_Encoding) vex3(esc byte, mmmmm byte, wlpp byte, r byte, rm interface{}, vvvv byte) { + var b byte + var x byte + + /* VEX.R must be a single-bit mask */ + if r > 1 { + panic("VEX.R must be a 1-bit mask") + } + + /* VEX.vvvv must be a 4-bit mask */ + if vvvv &^ 0b1111 != 0 { + panic("VEX.vvvv must be a 4-bit mask") + } + + /* escape must be a 3-byte VEX (0xc4) or XOP (0x8f) prefix */ + if esc != 0xc4 && esc != 0x8f { + panic("escape must be a 3-byte VEX (0xc4) or XOP (0x8f) prefix") + } + + /* VEX.W____Lpp is expected to have no bits set except 0, 1, 2 and 7 */ + if wlpp &^ 0b10000111 != 0 { + panic("VEX.W____Lpp is expected to have no bits set except 0, 1, 2 and 7") + } + + /* VEX.m-mmmm is expected to be a 5-bit mask */ + if mmmmm &^ 0b11111 != 0 { + panic("VEX.m-mmmm is expected to be a 5-bit mask") + } + + /* encode the RM bits */ + switch v := rm.(type) { + case *Label : break + case MemoryAddress : b, x = toHcodeOpt(v.Base), toHcodeOpt(v.Index) + case RelativeOffset : break + default : panic("rm is expected to be a register or a memory address") + } + + /* encode the 3-byte VEX or XOP prefix */ + self.emit(esc) + self.emit(0xe0 ^ (r << 7) ^ (x << 6) ^ (b << 5) ^ mmmmm) + self.emit(0x78 ^ (vvvv << 3) ^ wlpp) +} + +// evex encodes a 4-byte EVEX prefix. +func (self *_Encoding) evex(mm byte, w1pp byte, ll byte, rr byte, rm interface{}, vvvvv byte, aaa byte, zz byte, bb byte) { + var b byte + var x byte + + /* EVEX.b must be a single-bit mask */ + if bb > 1 { + panic("EVEX.b must be a 1-bit mask") + } + + /* EVEX.z must be a single-bit mask */ + if zz > 1 { + panic("EVEX.z must be a 1-bit mask") + } + + /* EVEX.mm must be a 2-bit mask */ + if mm &^ 0b11 != 0 { + panic("EVEX.mm must be a 2-bit mask") + } + + /* EVEX.L'L must be a 2-bit mask */ + if ll &^ 0b11 != 0 { + panic("EVEX.L'L must be a 2-bit mask") + } + + /* EVEX.R'R must be a 2-bit mask */ + if rr &^ 0b11 != 0 { + panic("EVEX.R'R must be a 2-bit mask") + } + + /* EVEX.aaa must be a 3-bit mask */ + if aaa &^ 0b111 != 0 { + panic("EVEX.aaa must be a 3-bit mask") + } + + /* EVEX.v'vvvv must be a 5-bit mask */ + if vvvvv &^ 0b11111 != 0 { + panic("EVEX.v'vvvv must be a 5-bit mask") + } + + /* EVEX.W____1pp is expected to have no bits set except 0, 1, 2, and 7 */ + if w1pp &^ 0b10000011 != 0b100 { + panic("EVEX.W____1pp is expected to have no bits set except 0, 1, 2, and 7") + } + + /* extract bits from EVEX.R'R and EVEX.v'vvvv */ + r1, r0 := rr >> 1, rr & 1 + v1, v0 := vvvvv >> 4, vvvvv & 0b1111 + + /* encode the RM bits if any */ + if rm != nil { + switch m := rm.(type) { + case *Label : break + case Register : b, x = hcode(m), ecode(m) + case MemoryAddress : b, x, v1 = toHcodeOpt(m.Base), toHcodeOpt(m.Index), toEcodeVMM(m.Index, v1) + case RelativeOffset : break + default : panic("rm is expected to be a register or a memory address") + } + } + + /* EVEX prefix bytes */ + p0 := (r0 << 7) | (x << 6) | (b << 5) | (r1 << 4) | mm + p1 := (v0 << 3) | w1pp + p2 := (zz << 7) | (ll << 5) | (b << 4) | (v1 << 3) | aaa + + /* p0: invert RXBR' (bits 4-7) + * p1: invert vvvv (bits 3-6) + * p2: invert V' (bit 3) */ + self.emit(0x62) + self.emit(p0 ^ 0xf0) + self.emit(p1 ^ 0x78) + self.emit(p2 ^ 0x08) +} + +// rexm encodes a mandatory REX prefix. +func (self *_Encoding) rexm(w byte, r byte, rm interface{}) { + var b byte + var x byte + + /* REX.R must be 0 or 1 */ + if r != 0 && r != 1 { + panic("REX.R must be 0 or 1") + } + + /* REX.W must be 0 or 1 */ + if w != 0 && w != 1 { + panic("REX.W must be 0 or 1") + } + + /* encode the RM bits */ + switch v := rm.(type) { + case *Label : break + case MemoryAddress : b, x = toHcodeOpt(v.Base), toHcodeOpt(v.Index) + case RelativeOffset : break + default : panic("rm is expected to be a register or a memory address") + } + + /* encode the REX prefix */ + self.emit(0x40 | (w << 3) | (r << 2) | (x << 1) | b) +} + +// rexo encodes an optional REX prefix. +func (self *_Encoding) rexo(r byte, rm interface{}, force bool) { + var b byte + var x byte + + /* REX.R must be 0 or 1 */ + if r != 0 && r != 1 { + panic("REX.R must be 0 or 1") + } + + /* encode the RM bits */ + switch v := rm.(type) { + case *Label : break + case Register : b = hcode(v) + case MemoryAddress : b, x = toHcodeOpt(v.Base), toHcodeOpt(v.Index) + case RelativeOffset : break + default : panic("rm is expected to be a register or a memory address") + } + + /* if REX.R, REX.X, and REX.B are all zeroes, REX prefix can be omitted */ + if force || r != 0 || x != 0 || b != 0 { + self.emit(0x40 | (r << 2) | (x << 1) | b) + } +} + +// mrsd encodes ModR/M, SIB and Displacement. +// +// ModR/M byte +// +----------------+---------------+---------------+ +// | Bits 6-7: Mode | Bits 3-5: Reg | Bits 0-2: R/M | +// +----------------+---------------+---------------+ +// +// SIB byte +// +-----------------+-----------------+----------------+ +// | Bits 6-7: Scale | Bits 3-5: Index | Bits 0-2: Base | +// +-----------------+-----------------+----------------+ +// +func (self *_Encoding) mrsd(reg byte, rm interface{}, disp8v int32) { + var ok bool + var mm MemoryAddress + var ro RelativeOffset + + /* ModRM encodes the lower 3-bit of the register */ + if reg > 7 { + panic("invalid register bits") + } + + /* check the displacement scale */ + switch disp8v { + case 1: break + case 2: break + case 4: break + case 8: break + case 16: break + case 32: break + case 64: break + default: panic("invalid displacement size") + } + + /* special case: unresolved labels, assuming a zero offset */ + if _, ok = rm.(*Label); ok { + self.emit(0x05 | (reg << 3)) + self.imm4(0) + return + } + + /* special case: RIP-relative offset + * ModRM.Mode == 0 and ModeRM.R/M == 5 indicates (rip + disp32) addressing */ + if ro, ok = rm.(RelativeOffset); ok { + self.emit(0x05 | (reg << 3)) + self.imm4(int64(ro)) + return + } + + /* must be a generic memory address */ + if mm, ok = rm.(MemoryAddress); !ok { + panic("rm must be a memory address") + } + + /* absolute addressing, encoded as disp(%rbp,%rsp,1) */ + if mm.Base == nil && mm.Index == nil { + self.emit(0x04 | (reg << 3)) + self.emit(0x25) + self.imm4(int64(mm.Displacement)) + return + } + + /* no SIB byte */ + if mm.Index == nil && lcode(mm.Base) != 0b100 { + cc := lcode(mm.Base) + dv := mm.Displacement + + /* ModRM.Mode == 0 (no displacement) */ + if dv == 0 && mm.Base != RBP && mm.Base != R13 { + if cc == 0b101 { + panic("rbp/r13 is not encodable as a base register (interpreted as disp32 address)") + } else { + self.emit((reg << 3) | cc) + return + } + } + + /* ModRM.Mode == 1 (8-bit displacement) */ + if dq := dv / disp8v; dq >= math.MinInt8 && dq <= math.MaxInt8 && dv % disp8v == 0 { + self.emit(0x40 | (reg << 3) | cc) + self.imm1(int64(dq)) + return + } + + /* ModRM.Mode == 2 (32-bit displacement) */ + self.emit(0x80 | (reg << 3) | cc) + self.imm4(int64(mm.Displacement)) + return + } + + /* all encodings below use ModRM.R/M = 4 (0b100) to indicate the presence of SIB */ + if mm.Index == RSP { + panic("rsp is not encodable as an index register (interpreted as no index)") + } + + /* index = 4 (0b100) denotes no-index encoding */ + var scale byte + var index byte = 0x04 + + /* encode the scale byte */ + if mm.Scale != 0 { + switch mm.Scale { + case 1 : scale = 0 + case 2 : scale = 1 + case 4 : scale = 2 + case 8 : scale = 3 + default : panic("invalid scale value") + } + } + + /* encode the index byte */ + if mm.Index != nil { + index = lcode(mm.Index) + } + + /* SIB.Base = 5 (0b101) and ModRM.Mode = 0 indicates no-base encoding with disp32 */ + if mm.Base == nil { + self.emit((reg << 3) | 0b100) + self.emit((scale << 6) | (index << 3) | 0b101) + self.imm4(int64(mm.Displacement)) + return + } + + /* base L-code & displacement value */ + cc := lcode(mm.Base) + dv := mm.Displacement + + /* ModRM.Mode == 0 (no displacement) */ + if dv == 0 && cc != 0b101 { + self.emit((reg << 3) | 0b100) + self.emit((scale << 6) | (index << 3) | cc) + return + } + + /* ModRM.Mode == 1 (8-bit displacement) */ + if dq := dv / disp8v; dq >= math.MinInt8 && dq <= math.MaxInt8 && dv % disp8v == 0 { + self.emit(0x44 | (reg << 3)) + self.emit((scale << 6) | (index << 3) | cc) + self.imm1(int64(dq)) + return + } + + /* ModRM.Mode == 2 (32-bit displacement) */ + self.emit(0x84 | (reg << 3)) + self.emit((scale << 6) | (index << 3) | cc) + self.imm4(int64(mm.Displacement)) +} + +// encode invokes the encoder to encode this instruction. +func (self *_Encoding) encode(v []interface{}) int { + self.len = 0 + self.encoder(self, v) + return self.len +} diff --git a/vendor/github.com/cloudwego/iasm/x86_64/instructions.go b/vendor/github.com/cloudwego/iasm/x86_64/instructions.go new file mode 100644 index 000000000..d9c069035 --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/x86_64/instructions.go @@ -0,0 +1,97210 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Code generated by "mkasm_amd64.py", DO NOT EDIT. + +package x86_64 + +// ADCB performs "Add with Carry". +// +// Mnemonic : ADC +// Supported forms : (6 forms) +// +// * ADCB imm8, al +// * ADCB imm8, r8 +// * ADCB r8, r8 +// * ADCB m8, r8 +// * ADCB imm8, m8 +// * ADCB r8, m8 +// +func (self *Program) ADCB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADCB", 2, Operands { v0, v1 }) + // ADCB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x14) + m.imm1(toImmAny(v[0])) + }) + } + // ADCB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADCB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x10) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADCB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADCB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADCB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x10) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADCB") + } + return p +} + +// ADCL performs "Add with Carry". +// +// Mnemonic : ADC +// Supported forms : (8 forms) +// +// * ADCL imm32, eax +// * ADCL imm8, r32 +// * ADCL imm32, r32 +// * ADCL r32, r32 +// * ADCL m32, r32 +// * ADCL imm8, m32 +// * ADCL imm32, m32 +// * ADCL r32, m32 +// +func (self *Program) ADCL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADCL", 2, Operands { v0, v1 }) + // ADCL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x15) + m.imm4(toImmAny(v[0])) + }) + } + // ADCL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADCL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xd0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ADCL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADCL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADCL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADCL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(2, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ADCL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADCL") + } + return p +} + +// ADCQ performs "Add with Carry". +// +// Mnemonic : ADC +// Supported forms : (8 forms) +// +// * ADCQ imm32, rax +// * ADCQ imm8, r64 +// * ADCQ imm32, r64 +// * ADCQ r64, r64 +// * ADCQ m64, r64 +// * ADCQ imm8, m64 +// * ADCQ imm32, m64 +// * ADCQ r64, m64 +// +func (self *Program) ADCQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADCQ", 2, Operands { v0, v1 }) + // ADCQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x15) + m.imm4(toImmAny(v[0])) + }) + } + // ADCQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADCQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xd0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ADCQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADCQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADCQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADCQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(2, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ADCQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADCQ") + } + return p +} + +// ADCW performs "Add with Carry". +// +// Mnemonic : ADC +// Supported forms : (8 forms) +// +// * ADCW imm16, ax +// * ADCW imm8, r16 +// * ADCW imm16, r16 +// * ADCW r16, r16 +// * ADCW m16, r16 +// * ADCW imm8, m16 +// * ADCW imm16, m16 +// * ADCW r16, m16 +// +func (self *Program) ADCW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADCW", 2, Operands { v0, v1 }) + // ADCW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x15) + m.imm2(toImmAny(v[0])) + }) + } + // ADCW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADCW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xd0 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // ADCW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADCW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADCW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADCW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(2, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // ADCW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADCW") + } + return p +} + +// ADCXL performs "Unsigned Integer Addition of Two Operands with Carry Flag". +// +// Mnemonic : ADCX +// Supported forms : (2 forms) +// +// * ADCXL r32, r32 [ADX] +// * ADCXL m32, r32 [ADX] +// +func (self *Program) ADCXL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADCXL", 2, Operands { v0, v1 }) + // ADCXL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADCXL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADCXL") + } + return p +} + +// ADCXQ performs "Unsigned Integer Addition of Two Operands with Carry Flag". +// +// Mnemonic : ADCX +// Supported forms : (2 forms) +// +// * ADCXQ r64, r64 [ADX] +// * ADCXQ m64, r64 [ADX] +// +func (self *Program) ADCXQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADCXQ", 2, Operands { v0, v1 }) + // ADCXQ r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADCXQ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADCXQ") + } + return p +} + +// ADDB performs "Add". +// +// Mnemonic : ADD +// Supported forms : (6 forms) +// +// * ADDB imm8, al +// * ADDB imm8, r8 +// * ADDB r8, r8 +// * ADDB m8, r8 +// * ADDB imm8, m8 +// * ADDB r8, m8 +// +func (self *Program) ADDB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDB", 2, Operands { v0, v1 }) + // ADDB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x04) + m.imm1(toImmAny(v[0])) + }) + } + // ADDB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADDB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x00) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x02) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x02) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADDB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADDB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x00) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDB") + } + return p +} + +// ADDL performs "Add". +// +// Mnemonic : ADD +// Supported forms : (8 forms) +// +// * ADDL imm32, eax +// * ADDL imm8, r32 +// * ADDL imm32, r32 +// * ADDL r32, r32 +// * ADDL m32, r32 +// * ADDL imm8, m32 +// * ADDL imm32, m32 +// * ADDL r32, m32 +// +func (self *Program) ADDL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDL", 2, Operands { v0, v1 }) + // ADDL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x05) + m.imm4(toImmAny(v[0])) + }) + } + // ADDL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADDL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xc0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ADDL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x01) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x03) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x03) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADDL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADDL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(0, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ADDL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x01) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDL") + } + return p +} + +// ADDPD performs "Add Packed Double-Precision Floating-Point Values". +// +// Mnemonic : ADDPD +// Supported forms : (2 forms) +// +// * ADDPD xmm, xmm [SSE2] +// * ADDPD m128, xmm [SSE2] +// +func (self *Program) ADDPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDPD", 2, Operands { v0, v1 }) + // ADDPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDPD") + } + return p +} + +// ADDPS performs "Add Packed Single-Precision Floating-Point Values". +// +// Mnemonic : ADDPS +// Supported forms : (2 forms) +// +// * ADDPS xmm, xmm [SSE] +// * ADDPS m128, xmm [SSE] +// +func (self *Program) ADDPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDPS", 2, Operands { v0, v1 }) + // ADDPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDPS") + } + return p +} + +// ADDQ performs "Add". +// +// Mnemonic : ADD +// Supported forms : (8 forms) +// +// * ADDQ imm32, rax +// * ADDQ imm8, r64 +// * ADDQ imm32, r64 +// * ADDQ r64, r64 +// * ADDQ m64, r64 +// * ADDQ imm8, m64 +// * ADDQ imm32, m64 +// * ADDQ r64, m64 +// +func (self *Program) ADDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDQ", 2, Operands { v0, v1 }) + // ADDQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x05) + m.imm4(toImmAny(v[0])) + }) + } + // ADDQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADDQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xc0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ADDQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x01) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x03) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x03) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADDQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADDQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(0, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ADDQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x01) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDQ") + } + return p +} + +// ADDSD performs "Add Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : ADDSD +// Supported forms : (2 forms) +// +// * ADDSD xmm, xmm [SSE2] +// * ADDSD m64, xmm [SSE2] +// +func (self *Program) ADDSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDSD", 2, Operands { v0, v1 }) + // ADDSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDSD") + } + return p +} + +// ADDSS performs "Add Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : ADDSS +// Supported forms : (2 forms) +// +// * ADDSS xmm, xmm [SSE] +// * ADDSS m32, xmm [SSE] +// +func (self *Program) ADDSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDSS", 2, Operands { v0, v1 }) + // ADDSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDSS") + } + return p +} + +// ADDSUBPD performs "Packed Double-FP Add/Subtract". +// +// Mnemonic : ADDSUBPD +// Supported forms : (2 forms) +// +// * ADDSUBPD xmm, xmm [SSE3] +// * ADDSUBPD m128, xmm [SSE3] +// +func (self *Program) ADDSUBPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDSUBPD", 2, Operands { v0, v1 }) + // ADDSUBPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd0) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDSUBPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDSUBPD") + } + return p +} + +// ADDSUBPS performs "Packed Single-FP Add/Subtract". +// +// Mnemonic : ADDSUBPS +// Supported forms : (2 forms) +// +// * ADDSUBPS xmm, xmm [SSE3] +// * ADDSUBPS m128, xmm [SSE3] +// +func (self *Program) ADDSUBPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDSUBPS", 2, Operands { v0, v1 }) + // ADDSUBPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd0) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDSUBPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDSUBPS") + } + return p +} + +// ADDW performs "Add". +// +// Mnemonic : ADD +// Supported forms : (8 forms) +// +// * ADDW imm16, ax +// * ADDW imm8, r16 +// * ADDW imm16, r16 +// * ADDW r16, r16 +// * ADDW m16, r16 +// * ADDW imm8, m16 +// * ADDW imm16, m16 +// * ADDW r16, m16 +// +func (self *Program) ADDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDW", 2, Operands { v0, v1 }) + // ADDW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x05) + m.imm2(toImmAny(v[0])) + }) + } + // ADDW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADDW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xc0 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // ADDW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x01) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x03) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x03) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADDW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADDW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(0, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // ADDW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x01) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDW") + } + return p +} + +// ADOXL performs "Unsigned Integer Addition of Two Operands with Overflow Flag". +// +// Mnemonic : ADOX +// Supported forms : (2 forms) +// +// * ADOXL r32, r32 [ADX] +// * ADOXL m32, r32 [ADX] +// +func (self *Program) ADOXL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADOXL", 2, Operands { v0, v1 }) + // ADOXL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADOXL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADOXL") + } + return p +} + +// ADOXQ performs "Unsigned Integer Addition of Two Operands with Overflow Flag". +// +// Mnemonic : ADOX +// Supported forms : (2 forms) +// +// * ADOXQ r64, r64 [ADX] +// * ADOXQ m64, r64 [ADX] +// +func (self *Program) ADOXQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADOXQ", 2, Operands { v0, v1 }) + // ADOXQ r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADOXQ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADOXQ") + } + return p +} + +// AESDEC performs "Perform One Round of an AES Decryption Flow". +// +// Mnemonic : AESDEC +// Supported forms : (2 forms) +// +// * AESDEC xmm, xmm [AES] +// * AESDEC m128, xmm [AES] +// +func (self *Program) AESDEC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("AESDEC", 2, Operands { v0, v1 }) + // AESDEC xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xde) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // AESDEC m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xde) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for AESDEC") + } + return p +} + +// AESDECLAST performs "Perform Last Round of an AES Decryption Flow". +// +// Mnemonic : AESDECLAST +// Supported forms : (2 forms) +// +// * AESDECLAST xmm, xmm [AES] +// * AESDECLAST m128, xmm [AES] +// +func (self *Program) AESDECLAST(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("AESDECLAST", 2, Operands { v0, v1 }) + // AESDECLAST xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // AESDECLAST m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for AESDECLAST") + } + return p +} + +// AESENC performs "Perform One Round of an AES Encryption Flow". +// +// Mnemonic : AESENC +// Supported forms : (2 forms) +// +// * AESENC xmm, xmm [AES] +// * AESENC m128, xmm [AES] +// +func (self *Program) AESENC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("AESENC", 2, Operands { v0, v1 }) + // AESENC xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // AESENC m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for AESENC") + } + return p +} + +// AESENCLAST performs "Perform Last Round of an AES Encryption Flow". +// +// Mnemonic : AESENCLAST +// Supported forms : (2 forms) +// +// * AESENCLAST xmm, xmm [AES] +// * AESENCLAST m128, xmm [AES] +// +func (self *Program) AESENCLAST(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("AESENCLAST", 2, Operands { v0, v1 }) + // AESENCLAST xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // AESENCLAST m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for AESENCLAST") + } + return p +} + +// AESIMC performs "Perform the AES InvMixColumn Transformation". +// +// Mnemonic : AESIMC +// Supported forms : (2 forms) +// +// * AESIMC xmm, xmm [AES] +// * AESIMC m128, xmm [AES] +// +func (self *Program) AESIMC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("AESIMC", 2, Operands { v0, v1 }) + // AESIMC xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // AESIMC m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for AESIMC") + } + return p +} + +// AESKEYGENASSIST performs "AES Round Key Generation Assist". +// +// Mnemonic : AESKEYGENASSIST +// Supported forms : (2 forms) +// +// * AESKEYGENASSIST imm8, xmm, xmm [AES] +// * AESKEYGENASSIST imm8, m128, xmm [AES] +// +func (self *Program) AESKEYGENASSIST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("AESKEYGENASSIST", 3, Operands { v0, v1, v2 }) + // AESKEYGENASSIST imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // AESKEYGENASSIST imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for AESKEYGENASSIST") + } + return p +} + +// ANDB performs "Logical AND". +// +// Mnemonic : AND +// Supported forms : (6 forms) +// +// * ANDB imm8, al +// * ANDB imm8, r8 +// * ANDB r8, r8 +// * ANDB m8, r8 +// * ANDB imm8, m8 +// * ANDB r8, m8 +// +func (self *Program) ANDB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDB", 2, Operands { v0, v1 }) + // ANDB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x24) + m.imm1(toImmAny(v[0])) + }) + } + // ANDB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ANDB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x20) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x22) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x22) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ANDB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ANDB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x20) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDB") + } + return p +} + +// ANDL performs "Logical AND". +// +// Mnemonic : AND +// Supported forms : (8 forms) +// +// * ANDL imm32, eax +// * ANDL imm8, r32 +// * ANDL imm32, r32 +// * ANDL r32, r32 +// * ANDL m32, r32 +// * ANDL imm8, m32 +// * ANDL imm32, m32 +// * ANDL r32, m32 +// +func (self *Program) ANDL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDL", 2, Operands { v0, v1 }) + // ANDL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x25) + m.imm4(toImmAny(v[0])) + }) + } + // ANDL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ANDL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xe0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ANDL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x21) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ANDL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ANDL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(4, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ANDL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x21) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDL") + } + return p +} + +// ANDNL performs "Logical AND NOT". +// +// Mnemonic : ANDN +// Supported forms : (2 forms) +// +// * ANDNL r32, r32, r32 [BMI] +// * ANDNL m32, r32, r32 [BMI] +// +func (self *Program) ANDNL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("ANDNL", 3, Operands { v0, v1, v2 }) + // ANDNL r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // ANDNL m32, r32, r32 + if isM32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x00, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDNL") + } + return p +} + +// ANDNPD performs "Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : ANDNPD +// Supported forms : (2 forms) +// +// * ANDNPD xmm, xmm [SSE2] +// * ANDNPD m128, xmm [SSE2] +// +func (self *Program) ANDNPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDNPD", 2, Operands { v0, v1 }) + // ANDNPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x55) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDNPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x55) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDNPD") + } + return p +} + +// ANDNPS performs "Bitwise Logical AND NOT of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : ANDNPS +// Supported forms : (2 forms) +// +// * ANDNPS xmm, xmm [SSE] +// * ANDNPS m128, xmm [SSE] +// +func (self *Program) ANDNPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDNPS", 2, Operands { v0, v1 }) + // ANDNPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x55) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDNPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x55) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDNPS") + } + return p +} + +// ANDNQ performs "Logical AND NOT". +// +// Mnemonic : ANDN +// Supported forms : (2 forms) +// +// * ANDNQ r64, r64, r64 [BMI] +// * ANDNQ m64, r64, r64 [BMI] +// +func (self *Program) ANDNQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("ANDNQ", 3, Operands { v0, v1, v2 }) + // ANDNQ r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // ANDNQ m64, r64, r64 + if isM64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDNQ") + } + return p +} + +// ANDPD performs "Bitwise Logical AND of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : ANDPD +// Supported forms : (2 forms) +// +// * ANDPD xmm, xmm [SSE2] +// * ANDPD m128, xmm [SSE2] +// +func (self *Program) ANDPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDPD", 2, Operands { v0, v1 }) + // ANDPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x54) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x54) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDPD") + } + return p +} + +// ANDPS performs "Bitwise Logical AND of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : ANDPS +// Supported forms : (2 forms) +// +// * ANDPS xmm, xmm [SSE] +// * ANDPS m128, xmm [SSE] +// +func (self *Program) ANDPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDPS", 2, Operands { v0, v1 }) + // ANDPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x54) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x54) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDPS") + } + return p +} + +// ANDQ performs "Logical AND". +// +// Mnemonic : AND +// Supported forms : (8 forms) +// +// * ANDQ imm32, rax +// * ANDQ imm8, r64 +// * ANDQ imm32, r64 +// * ANDQ r64, r64 +// * ANDQ m64, r64 +// * ANDQ imm8, m64 +// * ANDQ imm32, m64 +// * ANDQ r64, m64 +// +func (self *Program) ANDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDQ", 2, Operands { v0, v1 }) + // ANDQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x25) + m.imm4(toImmAny(v[0])) + }) + } + // ANDQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ANDQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xe0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ANDQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x21) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ANDQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ANDQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(4, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ANDQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x21) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDQ") + } + return p +} + +// ANDW performs "Logical AND". +// +// Mnemonic : AND +// Supported forms : (8 forms) +// +// * ANDW imm16, ax +// * ANDW imm8, r16 +// * ANDW imm16, r16 +// * ANDW r16, r16 +// * ANDW m16, r16 +// * ANDW imm8, m16 +// * ANDW imm16, m16 +// * ANDW r16, m16 +// +func (self *Program) ANDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDW", 2, Operands { v0, v1 }) + // ANDW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x25) + m.imm2(toImmAny(v[0])) + }) + } + // ANDW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ANDW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xe0 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // ANDW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x21) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ANDW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ANDW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(4, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // ANDW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x21) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDW") + } + return p +} + +// BEXTR performs "Bit Field Extract". +// +// Mnemonic : BEXTR +// Supported forms : (8 forms) +// +// * BEXTR imm32, r32, r32 [TBM] +// * BEXTR imm32, m32, r32 [TBM] +// * BEXTR imm32, r64, r64 [TBM] +// * BEXTR imm32, m64, r64 [TBM] +// * BEXTR r32, r32, r32 [BMI] +// * BEXTR r32, m32, r32 [BMI] +// * BEXTR r64, r64, r64 [BMI] +// * BEXTR r64, m64, r64 [BMI] +// +func (self *Program) BEXTR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("BEXTR", 3, Operands { v0, v1, v2 }) + // BEXTR imm32, r32, r32 + if isImm32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xea ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // BEXTR imm32, m32, r32 + if isImm32(v0) && isM32(v1) && isReg32(v2) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1010, 0x00, hcode(v[2]), addr(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // BEXTR imm32, r64, r64 + if isImm32(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xea ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf8) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // BEXTR imm32, m64, r64 + if isImm32(v0) && isM64(v1) && isReg64(v2) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1010, 0x80, hcode(v[2]), addr(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // BEXTR r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // BEXTR r32, m32, r32 + if isReg32(v0) && isM32(v1) && isReg32(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // BEXTR r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf8 ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // BEXTR r64, m64, r64 + if isReg64(v0) && isM64(v1) && isReg64(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x80, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BEXTR") + } + return p +} + +// BLCFILL performs "Fill From Lowest Clear Bit". +// +// Mnemonic : BLCFILL +// Supported forms : (4 forms) +// +// * BLCFILL r32, r32 [TBM] +// * BLCFILL m32, r32 [TBM] +// * BLCFILL r64, r64 [TBM] +// * BLCFILL m64, r64 [TBM] +// +func (self *Program) BLCFILL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLCFILL", 2, Operands { v0, v1 }) + // BLCFILL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xc8 | lcode(v[0])) + }) + } + // BLCFILL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(1, addr(v[0]), 1) + }) + } + // BLCFILL r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xc8 | lcode(v[0])) + }) + } + // BLCFILL m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLCFILL") + } + return p +} + +// BLCI performs "Isolate Lowest Clear Bit". +// +// Mnemonic : BLCI +// Supported forms : (4 forms) +// +// * BLCI r32, r32 [TBM] +// * BLCI m32, r32 [TBM] +// * BLCI r64, r64 [TBM] +// * BLCI m64, r64 [TBM] +// +func (self *Program) BLCI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLCI", 2, Operands { v0, v1 }) + // BLCI r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x02) + m.emit(0xf0 | lcode(v[0])) + }) + } + // BLCI m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x02) + m.mrsd(6, addr(v[0]), 1) + }) + } + // BLCI r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x02) + m.emit(0xf0 | lcode(v[0])) + }) + } + // BLCI m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x02) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLCI") + } + return p +} + +// BLCIC performs "Isolate Lowest Set Bit and Complement". +// +// Mnemonic : BLCIC +// Supported forms : (4 forms) +// +// * BLCIC r32, r32 [TBM] +// * BLCIC m32, r32 [TBM] +// * BLCIC r64, r64 [TBM] +// * BLCIC m64, r64 [TBM] +// +func (self *Program) BLCIC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLCIC", 2, Operands { v0, v1 }) + // BLCIC r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xe8 | lcode(v[0])) + }) + } + // BLCIC m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(5, addr(v[0]), 1) + }) + } + // BLCIC r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xe8 | lcode(v[0])) + }) + } + // BLCIC m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(5, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLCIC") + } + return p +} + +// BLCMSK performs "Mask From Lowest Clear Bit". +// +// Mnemonic : BLCMSK +// Supported forms : (4 forms) +// +// * BLCMSK r32, r32 [TBM] +// * BLCMSK m32, r32 [TBM] +// * BLCMSK r64, r64 [TBM] +// * BLCMSK m64, r64 [TBM] +// +func (self *Program) BLCMSK(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLCMSK", 2, Operands { v0, v1 }) + // BLCMSK r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x02) + m.emit(0xc8 | lcode(v[0])) + }) + } + // BLCMSK m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x02) + m.mrsd(1, addr(v[0]), 1) + }) + } + // BLCMSK r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x02) + m.emit(0xc8 | lcode(v[0])) + }) + } + // BLCMSK m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x02) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLCMSK") + } + return p +} + +// BLCS performs "Set Lowest Clear Bit". +// +// Mnemonic : BLCS +// Supported forms : (4 forms) +// +// * BLCS r32, r32 [TBM] +// * BLCS m32, r32 [TBM] +// * BLCS r64, r64 [TBM] +// * BLCS m64, r64 [TBM] +// +func (self *Program) BLCS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLCS", 2, Operands { v0, v1 }) + // BLCS r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xd8 | lcode(v[0])) + }) + } + // BLCS m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(3, addr(v[0]), 1) + }) + } + // BLCS r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xd8 | lcode(v[0])) + }) + } + // BLCS m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLCS") + } + return p +} + +// BLENDPD performs "Blend Packed Double Precision Floating-Point Values". +// +// Mnemonic : BLENDPD +// Supported forms : (2 forms) +// +// * BLENDPD imm8, xmm, xmm [SSE4.1] +// * BLENDPD imm8, m128, xmm [SSE4.1] +// +func (self *Program) BLENDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("BLENDPD", 3, Operands { v0, v1, v2 }) + // BLENDPD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BLENDPD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0d) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for BLENDPD") + } + return p +} + +// BLENDPS performs " Blend Packed Single Precision Floating-Point Values". +// +// Mnemonic : BLENDPS +// Supported forms : (2 forms) +// +// * BLENDPS imm8, xmm, xmm [SSE4.1] +// * BLENDPS imm8, m128, xmm [SSE4.1] +// +func (self *Program) BLENDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("BLENDPS", 3, Operands { v0, v1, v2 }) + // BLENDPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BLENDPS imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0c) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for BLENDPS") + } + return p +} + +// BLENDVPD performs " Variable Blend Packed Double Precision Floating-Point Values". +// +// Mnemonic : BLENDVPD +// Supported forms : (2 forms) +// +// * BLENDVPD xmm0, xmm, xmm [SSE4.1] +// * BLENDVPD xmm0, m128, xmm [SSE4.1] +// +func (self *Program) BLENDVPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("BLENDVPD", 3, Operands { v0, v1, v2 }) + // BLENDVPD xmm0, xmm, xmm + if v0 == XMM0 && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // BLENDVPD xmm0, m128, xmm + if v0 == XMM0 && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLENDVPD") + } + return p +} + +// BLENDVPS performs " Variable Blend Packed Single Precision Floating-Point Values". +// +// Mnemonic : BLENDVPS +// Supported forms : (2 forms) +// +// * BLENDVPS xmm0, xmm, xmm [SSE4.1] +// * BLENDVPS xmm0, m128, xmm [SSE4.1] +// +func (self *Program) BLENDVPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("BLENDVPS", 3, Operands { v0, v1, v2 }) + // BLENDVPS xmm0, xmm, xmm + if v0 == XMM0 && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // BLENDVPS xmm0, m128, xmm + if v0 == XMM0 && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLENDVPS") + } + return p +} + +// BLSFILL performs "Fill From Lowest Set Bit". +// +// Mnemonic : BLSFILL +// Supported forms : (4 forms) +// +// * BLSFILL r32, r32 [TBM] +// * BLSFILL m32, r32 [TBM] +// * BLSFILL r64, r64 [TBM] +// * BLSFILL m64, r64 [TBM] +// +func (self *Program) BLSFILL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLSFILL", 2, Operands { v0, v1 }) + // BLSFILL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xd0 | lcode(v[0])) + }) + } + // BLSFILL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(2, addr(v[0]), 1) + }) + } + // BLSFILL r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xd0 | lcode(v[0])) + }) + } + // BLSFILL m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLSFILL") + } + return p +} + +// BLSI performs "Isolate Lowest Set Bit". +// +// Mnemonic : BLSI +// Supported forms : (4 forms) +// +// * BLSI r32, r32 [BMI] +// * BLSI m32, r32 [BMI] +// * BLSI r64, r64 [BMI] +// * BLSI m64, r64 [BMI] +// +func (self *Program) BLSI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLSI", 2, Operands { v0, v1 }) + // BLSI r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0xf3) + m.emit(0xd8 | lcode(v[0])) + }) + } + // BLSI m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(3, addr(v[0]), 1) + }) + } + // BLSI r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0xf3) + m.emit(0xd8 | lcode(v[0])) + }) + } + // BLSI m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLSI") + } + return p +} + +// BLSIC performs "Isolate Lowest Set Bit and Complement". +// +// Mnemonic : BLSIC +// Supported forms : (4 forms) +// +// * BLSIC r32, r32 [TBM] +// * BLSIC m32, r32 [TBM] +// * BLSIC r64, r64 [TBM] +// * BLSIC m64, r64 [TBM] +// +func (self *Program) BLSIC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLSIC", 2, Operands { v0, v1 }) + // BLSIC r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xf0 | lcode(v[0])) + }) + } + // BLSIC m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(6, addr(v[0]), 1) + }) + } + // BLSIC r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xf0 | lcode(v[0])) + }) + } + // BLSIC m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLSIC") + } + return p +} + +// BLSMSK performs "Mask From Lowest Set Bit". +// +// Mnemonic : BLSMSK +// Supported forms : (4 forms) +// +// * BLSMSK r32, r32 [BMI] +// * BLSMSK m32, r32 [BMI] +// * BLSMSK r64, r64 [BMI] +// * BLSMSK m64, r64 [BMI] +// +func (self *Program) BLSMSK(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLSMSK", 2, Operands { v0, v1 }) + // BLSMSK r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0xf3) + m.emit(0xd0 | lcode(v[0])) + }) + } + // BLSMSK m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(2, addr(v[0]), 1) + }) + } + // BLSMSK r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0xf3) + m.emit(0xd0 | lcode(v[0])) + }) + } + // BLSMSK m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLSMSK") + } + return p +} + +// BLSR performs "Reset Lowest Set Bit". +// +// Mnemonic : BLSR +// Supported forms : (4 forms) +// +// * BLSR r32, r32 [BMI] +// * BLSR m32, r32 [BMI] +// * BLSR r64, r64 [BMI] +// * BLSR m64, r64 [BMI] +// +func (self *Program) BLSR(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLSR", 2, Operands { v0, v1 }) + // BLSR r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0xf3) + m.emit(0xc8 | lcode(v[0])) + }) + } + // BLSR m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(1, addr(v[0]), 1) + }) + } + // BLSR r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0xf3) + m.emit(0xc8 | lcode(v[0])) + }) + } + // BLSR m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLSR") + } + return p +} + +// BSFL performs "Bit Scan Forward". +// +// Mnemonic : BSF +// Supported forms : (2 forms) +// +// * BSFL r32, r32 +// * BSFL m32, r32 +// +func (self *Program) BSFL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BSFL", 2, Operands { v0, v1 }) + // BSFL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // BSFL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BSFL") + } + return p +} + +// BSFQ performs "Bit Scan Forward". +// +// Mnemonic : BSF +// Supported forms : (2 forms) +// +// * BSFQ r64, r64 +// * BSFQ m64, r64 +// +func (self *Program) BSFQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BSFQ", 2, Operands { v0, v1 }) + // BSFQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // BSFQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xbc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BSFQ") + } + return p +} + +// BSFW performs "Bit Scan Forward". +// +// Mnemonic : BSF +// Supported forms : (2 forms) +// +// * BSFW r16, r16 +// * BSFW m16, r16 +// +func (self *Program) BSFW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BSFW", 2, Operands { v0, v1 }) + // BSFW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // BSFW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BSFW") + } + return p +} + +// BSRL performs "Bit Scan Reverse". +// +// Mnemonic : BSR +// Supported forms : (2 forms) +// +// * BSRL r32, r32 +// * BSRL m32, r32 +// +func (self *Program) BSRL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BSRL", 2, Operands { v0, v1 }) + // BSRL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // BSRL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BSRL") + } + return p +} + +// BSRQ performs "Bit Scan Reverse". +// +// Mnemonic : BSR +// Supported forms : (2 forms) +// +// * BSRQ r64, r64 +// * BSRQ m64, r64 +// +func (self *Program) BSRQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BSRQ", 2, Operands { v0, v1 }) + // BSRQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // BSRQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xbd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BSRQ") + } + return p +} + +// BSRW performs "Bit Scan Reverse". +// +// Mnemonic : BSR +// Supported forms : (2 forms) +// +// * BSRW r16, r16 +// * BSRW m16, r16 +// +func (self *Program) BSRW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BSRW", 2, Operands { v0, v1 }) + // BSRW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // BSRW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BSRW") + } + return p +} + +// BSWAPL performs "Byte Swap". +// +// Mnemonic : BSWAP +// Supported forms : (1 form) +// +// * BSWAPL r32 +// +func (self *Program) BSWAPL(v0 interface{}) *Instruction { + p := self.alloc("BSWAPL", 1, Operands { v0 }) + // BSWAPL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0x0f) + m.emit(0xc8 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for BSWAPL") + } + return p +} + +// BSWAPQ performs "Byte Swap". +// +// Mnemonic : BSWAP +// Supported forms : (1 form) +// +// * BSWAPQ r64 +// +func (self *Program) BSWAPQ(v0 interface{}) *Instruction { + p := self.alloc("BSWAPQ", 1, Operands { v0 }) + // BSWAPQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xc8 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for BSWAPQ") + } + return p +} + +// BTCL performs "Bit Test and Complement". +// +// Mnemonic : BTC +// Supported forms : (4 forms) +// +// * BTCL imm8, r32 +// * BTCL r32, r32 +// * BTCL imm8, m32 +// * BTCL r32, m32 +// +func (self *Program) BTCL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTCL", 2, Operands { v0, v1 }) + // BTCL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTCL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTCL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTCL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xbb) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTCL") + } + return p +} + +// BTCQ performs "Bit Test and Complement". +// +// Mnemonic : BTC +// Supported forms : (4 forms) +// +// * BTCQ imm8, r64 +// * BTCQ r64, r64 +// * BTCQ imm8, m64 +// * BTCQ r64, m64 +// +func (self *Program) BTCQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTCQ", 2, Operands { v0, v1 }) + // BTCQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTCQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTCQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTCQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0xbb) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTCQ") + } + return p +} + +// BTCW performs "Bit Test and Complement". +// +// Mnemonic : BTC +// Supported forms : (4 forms) +// +// * BTCW imm8, r16 +// * BTCW r16, r16 +// * BTCW imm8, m16 +// * BTCW r16, m16 +// +func (self *Program) BTCW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTCW", 2, Operands { v0, v1 }) + // BTCW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTCW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTCW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTCW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xbb) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTCW") + } + return p +} + +// BTL performs "Bit Test". +// +// Mnemonic : BT +// Supported forms : (4 forms) +// +// * BTL imm8, r32 +// * BTL r32, r32 +// * BTL imm8, m32 +// * BTL r32, m32 +// +func (self *Program) BTL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTL", 2, Operands { v0, v1 }) + // BTL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xa3) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTL") + } + return p +} + +// BTQ performs "Bit Test". +// +// Mnemonic : BT +// Supported forms : (4 forms) +// +// * BTQ imm8, r64 +// * BTQ r64, r64 +// * BTQ imm8, m64 +// * BTQ r64, m64 +// +func (self *Program) BTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTQ", 2, Operands { v0, v1 }) + // BTQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xa3) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTQ") + } + return p +} + +// BTRL performs "Bit Test and Reset". +// +// Mnemonic : BTR +// Supported forms : (4 forms) +// +// * BTRL imm8, r32 +// * BTRL r32, r32 +// * BTRL imm8, m32 +// * BTRL r32, m32 +// +func (self *Program) BTRL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTRL", 2, Operands { v0, v1 }) + // BTRL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTRL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xb3) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTRL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(6, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTRL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xb3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTRL") + } + return p +} + +// BTRQ performs "Bit Test and Reset". +// +// Mnemonic : BTR +// Supported forms : (4 forms) +// +// * BTRQ imm8, r64 +// * BTRQ r64, r64 +// * BTRQ imm8, m64 +// * BTRQ r64, m64 +// +func (self *Program) BTRQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTRQ", 2, Operands { v0, v1 }) + // BTRQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTRQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xb3) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTRQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(6, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTRQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0xb3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTRQ") + } + return p +} + +// BTRW performs "Bit Test and Reset". +// +// Mnemonic : BTR +// Supported forms : (4 forms) +// +// * BTRW imm8, r16 +// * BTRW r16, r16 +// * BTRW imm8, m16 +// * BTRW r16, m16 +// +func (self *Program) BTRW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTRW", 2, Operands { v0, v1 }) + // BTRW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTRW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xb3) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTRW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(6, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTRW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xb3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTRW") + } + return p +} + +// BTSL performs "Bit Test and Set". +// +// Mnemonic : BTS +// Supported forms : (4 forms) +// +// * BTSL imm8, r32 +// * BTSL r32, r32 +// * BTSL imm8, m32 +// * BTSL r32, m32 +// +func (self *Program) BTSL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTSL", 2, Operands { v0, v1 }) + // BTSL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTSL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xab) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTSL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTSL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xab) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTSL") + } + return p +} + +// BTSQ performs "Bit Test and Set". +// +// Mnemonic : BTS +// Supported forms : (4 forms) +// +// * BTSQ imm8, r64 +// * BTSQ r64, r64 +// * BTSQ imm8, m64 +// * BTSQ r64, m64 +// +func (self *Program) BTSQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTSQ", 2, Operands { v0, v1 }) + // BTSQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTSQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xab) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTSQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTSQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0xab) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTSQ") + } + return p +} + +// BTSW performs "Bit Test and Set". +// +// Mnemonic : BTS +// Supported forms : (4 forms) +// +// * BTSW imm8, r16 +// * BTSW r16, r16 +// * BTSW imm8, m16 +// * BTSW r16, m16 +// +func (self *Program) BTSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTSW", 2, Operands { v0, v1 }) + // BTSW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTSW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xab) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTSW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTSW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xab) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTSW") + } + return p +} + +// BTW performs "Bit Test". +// +// Mnemonic : BT +// Supported forms : (4 forms) +// +// * BTW imm8, r16 +// * BTW r16, r16 +// * BTW imm8, m16 +// * BTW r16, m16 +// +func (self *Program) BTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTW", 2, Operands { v0, v1 }) + // BTW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xa3) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTW") + } + return p +} + +// BZHI performs "Zero High Bits Starting with Specified Bit Position". +// +// Mnemonic : BZHI +// Supported forms : (4 forms) +// +// * BZHI r32, r32, r32 [BMI2] +// * BZHI r32, m32, r32 [BMI2] +// * BZHI r64, r64, r64 [BMI2] +// * BZHI r64, m64, r64 [BMI2] +// +func (self *Program) BZHI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("BZHI", 3, Operands { v0, v1, v2 }) + // BZHI r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // BZHI r32, m32, r32 + if isReg32(v0) && isM32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // BZHI r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf8 ^ (hlcode(v[0]) << 3)) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // BZHI r64, m64, r64 + if isReg64(v0) && isM64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x80, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BZHI") + } + return p +} + +// CALL performs "Call Procedure". +// +// Mnemonic : CALL +// Supported forms : (1 form) +// +// * CALL rel32 +// +func (self *Program) CALL(v0 interface{}) *Instruction { + p := self.alloc("CALL", 1, Operands { v0 }) + // CALL rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xe8) + m.imm4(relv(v[0])) + }) + } + // CALL label + if isLabel(v0) { + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0xe8) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for CALL") + } + return p +} + +// CALLQ performs "Call Procedure". +// +// Mnemonic : CALL +// Supported forms : (2 forms) +// +// * CALLQ r64 +// * CALLQ m64 +// +func (self *Program) CALLQ(v0 interface{}) *Instruction { + p := self.alloc("CALLQ", 1, Operands { v0 }) + // CALLQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xd0 | lcode(v[0])) + }) + } + // CALLQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CALLQ") + } + return p +} + +// CBTW performs "Convert Byte to Word". +// +// Mnemonic : CBW +// Supported forms : (1 form) +// +// * CBTW +// +func (self *Program) CBTW() *Instruction { + p := self.alloc("CBTW", 0, Operands { }) + // CBTW + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x98) + }) + return p +} + +// CLC performs "Clear Carry Flag". +// +// Mnemonic : CLC +// Supported forms : (1 form) +// +// * CLC +// +func (self *Program) CLC() *Instruction { + p := self.alloc("CLC", 0, Operands { }) + // CLC + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf8) + }) + return p +} + +// CLD performs "Clear Direction Flag". +// +// Mnemonic : CLD +// Supported forms : (1 form) +// +// * CLD +// +func (self *Program) CLD() *Instruction { + p := self.alloc("CLD", 0, Operands { }) + // CLD + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xfc) + }) + return p +} + +// CLFLUSH performs "Flush Cache Line". +// +// Mnemonic : CLFLUSH +// Supported forms : (1 form) +// +// * CLFLUSH m8 [CLFLUSH] +// +func (self *Program) CLFLUSH(v0 interface{}) *Instruction { + p := self.alloc("CLFLUSH", 1, Operands { v0 }) + // CLFLUSH m8 + if isM8(v0) { + self.require(ISA_CLFLUSH) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0xae) + m.mrsd(7, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CLFLUSH") + } + return p +} + +// CLFLUSHOPT performs "Flush Cache Line Optimized". +// +// Mnemonic : CLFLUSHOPT +// Supported forms : (1 form) +// +// * CLFLUSHOPT m8 [CLFLUSHOPT] +// +func (self *Program) CLFLUSHOPT(v0 interface{}) *Instruction { + p := self.alloc("CLFLUSHOPT", 1, Operands { v0 }) + // CLFLUSHOPT m8 + if isM8(v0) { + self.require(ISA_CLFLUSHOPT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0xae) + m.mrsd(7, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CLFLUSHOPT") + } + return p +} + +// CLTD performs "Convert Doubleword to Quadword". +// +// Mnemonic : CDQ +// Supported forms : (1 form) +// +// * CLTD +// +func (self *Program) CLTD() *Instruction { + p := self.alloc("CLTD", 0, Operands { }) + // CLTD + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x99) + }) + return p +} + +// CLTQ performs "Convert Doubleword to Quadword". +// +// Mnemonic : CDQE +// Supported forms : (1 form) +// +// * CLTQ +// +func (self *Program) CLTQ() *Instruction { + p := self.alloc("CLTQ", 0, Operands { }) + // CLTQ + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x98) + }) + return p +} + +// CLWB performs "Cache Line Write Back". +// +// Mnemonic : CLWB +// Supported forms : (1 form) +// +// * CLWB m8 [CLWB] +// +func (self *Program) CLWB(v0 interface{}) *Instruction { + p := self.alloc("CLWB", 1, Operands { v0 }) + // CLWB m8 + if isM8(v0) { + self.require(ISA_CLWB) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0xae) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CLWB") + } + return p +} + +// CLZERO performs "Zero-out 64-bit Cache Line". +// +// Mnemonic : CLZERO +// Supported forms : (1 form) +// +// * CLZERO [CLZERO] +// +func (self *Program) CLZERO() *Instruction { + p := self.alloc("CLZERO", 0, Operands { }) + // CLZERO + self.require(ISA_CLZERO) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x01) + m.emit(0xfc) + }) + return p +} + +// CMC performs "Complement Carry Flag". +// +// Mnemonic : CMC +// Supported forms : (1 form) +// +// * CMC +// +func (self *Program) CMC() *Instruction { + p := self.alloc("CMC", 0, Operands { }) + // CMC + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf5) + }) + return p +} + +// CMOVA performs "Move if above (CF == 0 and ZF == 0)". +// +// Mnemonic : CMOVA +// Supported forms : (6 forms) +// +// * CMOVA r16, r16 [CMOV] +// * CMOVA m16, r16 [CMOV] +// * CMOVA r32, r32 [CMOV] +// * CMOVA m32, r32 [CMOV] +// * CMOVA r64, r64 [CMOV] +// * CMOVA m64, r64 [CMOV] +// +func (self *Program) CMOVA(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVA", 2, Operands { v0, v1 }) + // CMOVA r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x47) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVA m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x47) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVA r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x47) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVA m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x47) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVA r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x47) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVA m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x47) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVA") + } + return p +} + +// CMOVAE performs "Move if above or equal (CF == 0)". +// +// Mnemonic : CMOVAE +// Supported forms : (6 forms) +// +// * CMOVAE r16, r16 [CMOV] +// * CMOVAE m16, r16 [CMOV] +// * CMOVAE r32, r32 [CMOV] +// * CMOVAE m32, r32 [CMOV] +// * CMOVAE r64, r64 [CMOV] +// * CMOVAE m64, r64 [CMOV] +// +func (self *Program) CMOVAE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVAE", 2, Operands { v0, v1 }) + // CMOVAE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVAE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVAE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVAE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVAE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVAE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVAE") + } + return p +} + +// CMOVB performs "Move if below (CF == 1)". +// +// Mnemonic : CMOVB +// Supported forms : (6 forms) +// +// * CMOVB r16, r16 [CMOV] +// * CMOVB m16, r16 [CMOV] +// * CMOVB r32, r32 [CMOV] +// * CMOVB m32, r32 [CMOV] +// * CMOVB r64, r64 [CMOV] +// * CMOVB m64, r64 [CMOV] +// +func (self *Program) CMOVB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVB", 2, Operands { v0, v1 }) + // CMOVB r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVB m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVB r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVB m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVB r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVB m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVB") + } + return p +} + +// CMOVBE performs "Move if below or equal (CF == 1 or ZF == 1)". +// +// Mnemonic : CMOVBE +// Supported forms : (6 forms) +// +// * CMOVBE r16, r16 [CMOV] +// * CMOVBE m16, r16 [CMOV] +// * CMOVBE r32, r32 [CMOV] +// * CMOVBE m32, r32 [CMOV] +// * CMOVBE r64, r64 [CMOV] +// * CMOVBE m64, r64 [CMOV] +// +func (self *Program) CMOVBE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVBE", 2, Operands { v0, v1 }) + // CMOVBE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x46) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVBE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x46) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVBE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x46) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVBE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x46) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVBE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x46) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVBE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x46) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVBE") + } + return p +} + +// CMOVC performs "Move if carry (CF == 1)". +// +// Mnemonic : CMOVC +// Supported forms : (6 forms) +// +// * CMOVC r16, r16 [CMOV] +// * CMOVC m16, r16 [CMOV] +// * CMOVC r32, r32 [CMOV] +// * CMOVC m32, r32 [CMOV] +// * CMOVC r64, r64 [CMOV] +// * CMOVC m64, r64 [CMOV] +// +func (self *Program) CMOVC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVC", 2, Operands { v0, v1 }) + // CMOVC r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVC m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVC r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVC m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVC r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVC m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVC") + } + return p +} + +// CMOVE performs "Move if equal (ZF == 1)". +// +// Mnemonic : CMOVE +// Supported forms : (6 forms) +// +// * CMOVE r16, r16 [CMOV] +// * CMOVE m16, r16 [CMOV] +// * CMOVE r32, r32 [CMOV] +// * CMOVE m32, r32 [CMOV] +// * CMOVE r64, r64 [CMOV] +// * CMOVE m64, r64 [CMOV] +// +func (self *Program) CMOVE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVE", 2, Operands { v0, v1 }) + // CMOVE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVE") + } + return p +} + +// CMOVG performs "Move if greater (ZF == 0 and SF == OF)". +// +// Mnemonic : CMOVG +// Supported forms : (6 forms) +// +// * CMOVG r16, r16 [CMOV] +// * CMOVG m16, r16 [CMOV] +// * CMOVG r32, r32 [CMOV] +// * CMOVG m32, r32 [CMOV] +// * CMOVG r64, r64 [CMOV] +// * CMOVG m64, r64 [CMOV] +// +func (self *Program) CMOVG(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVG", 2, Operands { v0, v1 }) + // CMOVG r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVG m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVG r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVG m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVG r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVG m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVG") + } + return p +} + +// CMOVGE performs "Move if greater or equal (SF == OF)". +// +// Mnemonic : CMOVGE +// Supported forms : (6 forms) +// +// * CMOVGE r16, r16 [CMOV] +// * CMOVGE m16, r16 [CMOV] +// * CMOVGE r32, r32 [CMOV] +// * CMOVGE m32, r32 [CMOV] +// * CMOVGE r64, r64 [CMOV] +// * CMOVGE m64, r64 [CMOV] +// +func (self *Program) CMOVGE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVGE", 2, Operands { v0, v1 }) + // CMOVGE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVGE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVGE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVGE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVGE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVGE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVGE") + } + return p +} + +// CMOVL performs "Move if less (SF != OF)". +// +// Mnemonic : CMOVL +// Supported forms : (6 forms) +// +// * CMOVL r16, r16 [CMOV] +// * CMOVL m16, r16 [CMOV] +// * CMOVL r32, r32 [CMOV] +// * CMOVL m32, r32 [CMOV] +// * CMOVL r64, r64 [CMOV] +// * CMOVL m64, r64 [CMOV] +// +func (self *Program) CMOVL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVL", 2, Operands { v0, v1 }) + // CMOVL r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVL m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVL r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVL m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVL") + } + return p +} + +// CMOVLE performs "Move if less or equal (ZF == 1 or SF != OF)". +// +// Mnemonic : CMOVLE +// Supported forms : (6 forms) +// +// * CMOVLE r16, r16 [CMOV] +// * CMOVLE m16, r16 [CMOV] +// * CMOVLE r32, r32 [CMOV] +// * CMOVLE m32, r32 [CMOV] +// * CMOVLE r64, r64 [CMOV] +// * CMOVLE m64, r64 [CMOV] +// +func (self *Program) CMOVLE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVLE", 2, Operands { v0, v1 }) + // CMOVLE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVLE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVLE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVLE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVLE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVLE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVLE") + } + return p +} + +// CMOVNA performs "Move if not above (CF == 1 or ZF == 1)". +// +// Mnemonic : CMOVNA +// Supported forms : (6 forms) +// +// * CMOVNA r16, r16 [CMOV] +// * CMOVNA m16, r16 [CMOV] +// * CMOVNA r32, r32 [CMOV] +// * CMOVNA m32, r32 [CMOV] +// * CMOVNA r64, r64 [CMOV] +// * CMOVNA m64, r64 [CMOV] +// +func (self *Program) CMOVNA(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNA", 2, Operands { v0, v1 }) + // CMOVNA r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x46) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNA m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x46) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNA r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x46) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNA m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x46) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNA r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x46) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNA m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x46) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNA") + } + return p +} + +// CMOVNAE performs "Move if not above or equal (CF == 1)". +// +// Mnemonic : CMOVNAE +// Supported forms : (6 forms) +// +// * CMOVNAE r16, r16 [CMOV] +// * CMOVNAE m16, r16 [CMOV] +// * CMOVNAE r32, r32 [CMOV] +// * CMOVNAE m32, r32 [CMOV] +// * CMOVNAE r64, r64 [CMOV] +// * CMOVNAE m64, r64 [CMOV] +// +func (self *Program) CMOVNAE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNAE", 2, Operands { v0, v1 }) + // CMOVNAE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNAE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNAE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNAE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNAE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNAE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNAE") + } + return p +} + +// CMOVNB performs "Move if not below (CF == 0)". +// +// Mnemonic : CMOVNB +// Supported forms : (6 forms) +// +// * CMOVNB r16, r16 [CMOV] +// * CMOVNB m16, r16 [CMOV] +// * CMOVNB r32, r32 [CMOV] +// * CMOVNB m32, r32 [CMOV] +// * CMOVNB r64, r64 [CMOV] +// * CMOVNB m64, r64 [CMOV] +// +func (self *Program) CMOVNB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNB", 2, Operands { v0, v1 }) + // CMOVNB r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNB m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNB r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNB m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNB r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNB m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNB") + } + return p +} + +// CMOVNBE performs "Move if not below or equal (CF == 0 and ZF == 0)". +// +// Mnemonic : CMOVNBE +// Supported forms : (6 forms) +// +// * CMOVNBE r16, r16 [CMOV] +// * CMOVNBE m16, r16 [CMOV] +// * CMOVNBE r32, r32 [CMOV] +// * CMOVNBE m32, r32 [CMOV] +// * CMOVNBE r64, r64 [CMOV] +// * CMOVNBE m64, r64 [CMOV] +// +func (self *Program) CMOVNBE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNBE", 2, Operands { v0, v1 }) + // CMOVNBE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x47) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNBE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x47) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNBE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x47) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNBE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x47) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNBE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x47) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNBE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x47) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNBE") + } + return p +} + +// CMOVNC performs "Move if not carry (CF == 0)". +// +// Mnemonic : CMOVNC +// Supported forms : (6 forms) +// +// * CMOVNC r16, r16 [CMOV] +// * CMOVNC m16, r16 [CMOV] +// * CMOVNC r32, r32 [CMOV] +// * CMOVNC m32, r32 [CMOV] +// * CMOVNC r64, r64 [CMOV] +// * CMOVNC m64, r64 [CMOV] +// +func (self *Program) CMOVNC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNC", 2, Operands { v0, v1 }) + // CMOVNC r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNC m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNC r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNC m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNC r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNC m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNC") + } + return p +} + +// CMOVNE performs "Move if not equal (ZF == 0)". +// +// Mnemonic : CMOVNE +// Supported forms : (6 forms) +// +// * CMOVNE r16, r16 [CMOV] +// * CMOVNE m16, r16 [CMOV] +// * CMOVNE r32, r32 [CMOV] +// * CMOVNE m32, r32 [CMOV] +// * CMOVNE r64, r64 [CMOV] +// * CMOVNE m64, r64 [CMOV] +// +func (self *Program) CMOVNE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNE", 2, Operands { v0, v1 }) + // CMOVNE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x45) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x45) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x45) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x45) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x45) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x45) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNE") + } + return p +} + +// CMOVNG performs "Move if not greater (ZF == 1 or SF != OF)". +// +// Mnemonic : CMOVNG +// Supported forms : (6 forms) +// +// * CMOVNG r16, r16 [CMOV] +// * CMOVNG m16, r16 [CMOV] +// * CMOVNG r32, r32 [CMOV] +// * CMOVNG m32, r32 [CMOV] +// * CMOVNG r64, r64 [CMOV] +// * CMOVNG m64, r64 [CMOV] +// +func (self *Program) CMOVNG(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNG", 2, Operands { v0, v1 }) + // CMOVNG r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNG m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNG r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNG m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNG r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNG m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNG") + } + return p +} + +// CMOVNGE performs "Move if not greater or equal (SF != OF)". +// +// Mnemonic : CMOVNGE +// Supported forms : (6 forms) +// +// * CMOVNGE r16, r16 [CMOV] +// * CMOVNGE m16, r16 [CMOV] +// * CMOVNGE r32, r32 [CMOV] +// * CMOVNGE m32, r32 [CMOV] +// * CMOVNGE r64, r64 [CMOV] +// * CMOVNGE m64, r64 [CMOV] +// +func (self *Program) CMOVNGE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNGE", 2, Operands { v0, v1 }) + // CMOVNGE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNGE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNGE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNGE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNGE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNGE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNGE") + } + return p +} + +// CMOVNL performs "Move if not less (SF == OF)". +// +// Mnemonic : CMOVNL +// Supported forms : (6 forms) +// +// * CMOVNL r16, r16 [CMOV] +// * CMOVNL m16, r16 [CMOV] +// * CMOVNL r32, r32 [CMOV] +// * CMOVNL m32, r32 [CMOV] +// * CMOVNL r64, r64 [CMOV] +// * CMOVNL m64, r64 [CMOV] +// +func (self *Program) CMOVNL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNL", 2, Operands { v0, v1 }) + // CMOVNL r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNL m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNL r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNL m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNL") + } + return p +} + +// CMOVNLE performs "Move if not less or equal (ZF == 0 and SF == OF)". +// +// Mnemonic : CMOVNLE +// Supported forms : (6 forms) +// +// * CMOVNLE r16, r16 [CMOV] +// * CMOVNLE m16, r16 [CMOV] +// * CMOVNLE r32, r32 [CMOV] +// * CMOVNLE m32, r32 [CMOV] +// * CMOVNLE r64, r64 [CMOV] +// * CMOVNLE m64, r64 [CMOV] +// +func (self *Program) CMOVNLE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNLE", 2, Operands { v0, v1 }) + // CMOVNLE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNLE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNLE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNLE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNLE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNLE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNLE") + } + return p +} + +// CMOVNO performs "Move if not overflow (OF == 0)". +// +// Mnemonic : CMOVNO +// Supported forms : (6 forms) +// +// * CMOVNO r16, r16 [CMOV] +// * CMOVNO m16, r16 [CMOV] +// * CMOVNO r32, r32 [CMOV] +// * CMOVNO m32, r32 [CMOV] +// * CMOVNO r64, r64 [CMOV] +// * CMOVNO m64, r64 [CMOV] +// +func (self *Program) CMOVNO(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNO", 2, Operands { v0, v1 }) + // CMOVNO r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x41) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNO m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x41) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNO r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x41) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNO m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x41) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNO r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x41) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNO m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x41) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNO") + } + return p +} + +// CMOVNP performs "Move if not parity (PF == 0)". +// +// Mnemonic : CMOVNP +// Supported forms : (6 forms) +// +// * CMOVNP r16, r16 [CMOV] +// * CMOVNP m16, r16 [CMOV] +// * CMOVNP r32, r32 [CMOV] +// * CMOVNP m32, r32 [CMOV] +// * CMOVNP r64, r64 [CMOV] +// * CMOVNP m64, r64 [CMOV] +// +func (self *Program) CMOVNP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNP", 2, Operands { v0, v1 }) + // CMOVNP r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNP m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNP r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNP m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNP r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNP m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNP") + } + return p +} + +// CMOVNS performs "Move if not sign (SF == 0)". +// +// Mnemonic : CMOVNS +// Supported forms : (6 forms) +// +// * CMOVNS r16, r16 [CMOV] +// * CMOVNS m16, r16 [CMOV] +// * CMOVNS r32, r32 [CMOV] +// * CMOVNS m32, r32 [CMOV] +// * CMOVNS r64, r64 [CMOV] +// * CMOVNS m64, r64 [CMOV] +// +func (self *Program) CMOVNS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNS", 2, Operands { v0, v1 }) + // CMOVNS r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x49) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNS m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x49) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNS r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x49) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNS m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x49) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNS r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x49) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNS m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x49) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNS") + } + return p +} + +// CMOVNZ performs "Move if not zero (ZF == 0)". +// +// Mnemonic : CMOVNZ +// Supported forms : (6 forms) +// +// * CMOVNZ r16, r16 [CMOV] +// * CMOVNZ m16, r16 [CMOV] +// * CMOVNZ r32, r32 [CMOV] +// * CMOVNZ m32, r32 [CMOV] +// * CMOVNZ r64, r64 [CMOV] +// * CMOVNZ m64, r64 [CMOV] +// +func (self *Program) CMOVNZ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNZ", 2, Operands { v0, v1 }) + // CMOVNZ r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x45) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNZ m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x45) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNZ r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x45) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNZ m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x45) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNZ r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x45) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNZ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x45) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNZ") + } + return p +} + +// CMOVO performs "Move if overflow (OF == 1)". +// +// Mnemonic : CMOVO +// Supported forms : (6 forms) +// +// * CMOVO r16, r16 [CMOV] +// * CMOVO m16, r16 [CMOV] +// * CMOVO r32, r32 [CMOV] +// * CMOVO m32, r32 [CMOV] +// * CMOVO r64, r64 [CMOV] +// * CMOVO m64, r64 [CMOV] +// +func (self *Program) CMOVO(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVO", 2, Operands { v0, v1 }) + // CMOVO r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x40) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVO m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x40) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVO r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x40) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVO m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x40) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVO r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x40) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVO m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x40) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVO") + } + return p +} + +// CMOVP performs "Move if parity (PF == 1)". +// +// Mnemonic : CMOVP +// Supported forms : (6 forms) +// +// * CMOVP r16, r16 [CMOV] +// * CMOVP m16, r16 [CMOV] +// * CMOVP r32, r32 [CMOV] +// * CMOVP m32, r32 [CMOV] +// * CMOVP r64, r64 [CMOV] +// * CMOVP m64, r64 [CMOV] +// +func (self *Program) CMOVP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVP", 2, Operands { v0, v1 }) + // CMOVP r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVP m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVP r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVP m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVP r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVP m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVP") + } + return p +} + +// CMOVPE performs "Move if parity even (PF == 1)". +// +// Mnemonic : CMOVPE +// Supported forms : (6 forms) +// +// * CMOVPE r16, r16 [CMOV] +// * CMOVPE m16, r16 [CMOV] +// * CMOVPE r32, r32 [CMOV] +// * CMOVPE m32, r32 [CMOV] +// * CMOVPE r64, r64 [CMOV] +// * CMOVPE m64, r64 [CMOV] +// +func (self *Program) CMOVPE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVPE", 2, Operands { v0, v1 }) + // CMOVPE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVPE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVPE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVPE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVPE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVPE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVPE") + } + return p +} + +// CMOVPO performs "Move if parity odd (PF == 0)". +// +// Mnemonic : CMOVPO +// Supported forms : (6 forms) +// +// * CMOVPO r16, r16 [CMOV] +// * CMOVPO m16, r16 [CMOV] +// * CMOVPO r32, r32 [CMOV] +// * CMOVPO m32, r32 [CMOV] +// * CMOVPO r64, r64 [CMOV] +// * CMOVPO m64, r64 [CMOV] +// +func (self *Program) CMOVPO(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVPO", 2, Operands { v0, v1 }) + // CMOVPO r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVPO m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVPO r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVPO m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVPO r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVPO m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVPO") + } + return p +} + +// CMOVS performs "Move if sign (SF == 1)". +// +// Mnemonic : CMOVS +// Supported forms : (6 forms) +// +// * CMOVS r16, r16 [CMOV] +// * CMOVS m16, r16 [CMOV] +// * CMOVS r32, r32 [CMOV] +// * CMOVS m32, r32 [CMOV] +// * CMOVS r64, r64 [CMOV] +// * CMOVS m64, r64 [CMOV] +// +func (self *Program) CMOVS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVS", 2, Operands { v0, v1 }) + // CMOVS r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x48) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVS m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x48) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVS r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x48) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVS m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x48) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVS r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x48) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVS m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x48) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVS") + } + return p +} + +// CMOVZ performs "Move if zero (ZF == 1)". +// +// Mnemonic : CMOVZ +// Supported forms : (6 forms) +// +// * CMOVZ r16, r16 [CMOV] +// * CMOVZ m16, r16 [CMOV] +// * CMOVZ r32, r32 [CMOV] +// * CMOVZ m32, r32 [CMOV] +// * CMOVZ r64, r64 [CMOV] +// * CMOVZ m64, r64 [CMOV] +// +func (self *Program) CMOVZ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVZ", 2, Operands { v0, v1 }) + // CMOVZ r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVZ m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVZ r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVZ m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVZ r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVZ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVZ") + } + return p +} + +// CMPB performs "Compare Two Operands". +// +// Mnemonic : CMP +// Supported forms : (6 forms) +// +// * CMPB imm8, al +// * CMPB imm8, r8 +// * CMPB r8, r8 +// * CMPB m8, r8 +// * CMPB imm8, m8 +// * CMPB r8, m8 +// +func (self *Program) CMPB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPB", 2, Operands { v0, v1 }) + // CMPB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x3c) + m.imm1(toImmAny(v[0])) + }) + } + // CMPB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x38) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMPB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x3a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMPB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // CMPB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x38) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPB") + } + return p +} + +// CMPL performs "Compare Two Operands". +// +// Mnemonic : CMP +// Supported forms : (8 forms) +// +// * CMPL imm32, eax +// * CMPL imm8, r32 +// * CMPL imm32, r32 +// * CMPL r32, r32 +// * CMPL m32, r32 +// * CMPL imm8, m32 +// * CMPL imm32, m32 +// * CMPL r32, m32 +// +func (self *Program) CMPL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPL", 2, Operands { v0, v1 }) + // CMPL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x3d) + m.imm4(toImmAny(v[0])) + }) + } + // CMPL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xf8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // CMPL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x39) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMPL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x3b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMPL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // CMPL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(7, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // CMPL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x39) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPL") + } + return p +} + +// CMPPD performs "Compare Packed Double-Precision Floating-Point Values". +// +// Mnemonic : CMPPD +// Supported forms : (2 forms) +// +// * CMPPD imm8, xmm, xmm [SSE2] +// * CMPPD imm8, m128, xmm [SSE2] +// +func (self *Program) CMPPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("CMPPD", 3, Operands { v0, v1, v2 }) + // CMPPD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPPD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc2) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for CMPPD") + } + return p +} + +// CMPPS performs "Compare Packed Single-Precision Floating-Point Values". +// +// Mnemonic : CMPPS +// Supported forms : (2 forms) +// +// * CMPPS imm8, xmm, xmm [SSE] +// * CMPPS imm8, m128, xmm [SSE] +// +func (self *Program) CMPPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("CMPPS", 3, Operands { v0, v1, v2 }) + // CMPPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPPS imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc2) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for CMPPS") + } + return p +} + +// CMPQ performs "Compare Two Operands". +// +// Mnemonic : CMP +// Supported forms : (8 forms) +// +// * CMPQ imm32, rax +// * CMPQ imm8, r64 +// * CMPQ imm32, r64 +// * CMPQ r64, r64 +// * CMPQ m64, r64 +// * CMPQ imm8, m64 +// * CMPQ imm32, m64 +// * CMPQ r64, m64 +// +func (self *Program) CMPQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPQ", 2, Operands { v0, v1 }) + // CMPQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x3d) + m.imm4(toImmAny(v[0])) + }) + } + // CMPQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xf8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // CMPQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x39) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMPQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x3b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMPQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // CMPQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(7, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // CMPQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x39) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPQ") + } + return p +} + +// CMPSD performs "Compare Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : CMPSD +// Supported forms : (2 forms) +// +// * CMPSD imm8, xmm, xmm [SSE2] +// * CMPSD imm8, m64, xmm [SSE2] +// +func (self *Program) CMPSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("CMPSD", 3, Operands { v0, v1, v2 }) + // CMPSD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPSD imm8, m64, xmm + if isImm8(v0) && isM64(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc2) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for CMPSD") + } + return p +} + +// CMPSS performs "Compare Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : CMPSS +// Supported forms : (2 forms) +// +// * CMPSS imm8, xmm, xmm [SSE] +// * CMPSS imm8, m32, xmm [SSE] +// +func (self *Program) CMPSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("CMPSS", 3, Operands { v0, v1, v2 }) + // CMPSS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPSS imm8, m32, xmm + if isImm8(v0) && isM32(v1) && isXMM(v2) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc2) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for CMPSS") + } + return p +} + +// CMPW performs "Compare Two Operands". +// +// Mnemonic : CMP +// Supported forms : (8 forms) +// +// * CMPW imm16, ax +// * CMPW imm8, r16 +// * CMPW imm16, r16 +// * CMPW r16, r16 +// * CMPW m16, r16 +// * CMPW imm8, m16 +// * CMPW imm16, m16 +// * CMPW r16, m16 +// +func (self *Program) CMPW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPW", 2, Operands { v0, v1 }) + // CMPW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x3d) + m.imm2(toImmAny(v[0])) + }) + } + // CMPW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xf8 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // CMPW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x39) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMPW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x3b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMPW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // CMPW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(7, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // CMPW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x39) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPW") + } + return p +} + +// CMPXCHG16B performs "Compare and Exchange 16 Bytes". +// +// Mnemonic : CMPXCHG16B +// Supported forms : (1 form) +// +// * CMPXCHG16B m128 +// +func (self *Program) CMPXCHG16B(v0 interface{}) *Instruction { + p := self.alloc("CMPXCHG16B", 1, Operands { v0 }) + // CMPXCHG16B m128 + if isM128(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0x0f) + m.emit(0xc7) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPXCHG16B") + } + return p +} + +// CMPXCHG8B performs "Compare and Exchange 8 Bytes". +// +// Mnemonic : CMPXCHG8B +// Supported forms : (1 form) +// +// * CMPXCHG8B m64 +// +func (self *Program) CMPXCHG8B(v0 interface{}) *Instruction { + p := self.alloc("CMPXCHG8B", 1, Operands { v0 }) + // CMPXCHG8B m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0xc7) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPXCHG8B") + } + return p +} + +// CMPXCHGB performs "Compare and Exchange". +// +// Mnemonic : CMPXCHG +// Supported forms : (2 forms) +// +// * CMPXCHGB r8, r8 +// * CMPXCHGB r8, m8 +// +func (self *Program) CMPXCHGB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPXCHGB", 2, Operands { v0, v1 }) + // CMPXCHGB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x0f) + m.emit(0xb0) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // CMPXCHGB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0xb0) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPXCHGB") + } + return p +} + +// CMPXCHGL performs "Compare and Exchange". +// +// Mnemonic : CMPXCHG +// Supported forms : (2 forms) +// +// * CMPXCHGL r32, r32 +// * CMPXCHGL r32, m32 +// +func (self *Program) CMPXCHGL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPXCHGL", 2, Operands { v0, v1 }) + // CMPXCHGL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xb1) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // CMPXCHGL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xb1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPXCHGL") + } + return p +} + +// CMPXCHGQ performs "Compare and Exchange". +// +// Mnemonic : CMPXCHG +// Supported forms : (2 forms) +// +// * CMPXCHGQ r64, r64 +// * CMPXCHGQ r64, m64 +// +func (self *Program) CMPXCHGQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPXCHGQ", 2, Operands { v0, v1 }) + // CMPXCHGQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xb1) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // CMPXCHGQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0xb1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPXCHGQ") + } + return p +} + +// CMPXCHGW performs "Compare and Exchange". +// +// Mnemonic : CMPXCHG +// Supported forms : (2 forms) +// +// * CMPXCHGW r16, r16 +// * CMPXCHGW r16, m16 +// +func (self *Program) CMPXCHGW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPXCHGW", 2, Operands { v0, v1 }) + // CMPXCHGW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xb1) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // CMPXCHGW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xb1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPXCHGW") + } + return p +} + +// COMISD performs "Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : COMISD +// Supported forms : (2 forms) +// +// * COMISD xmm, xmm [SSE2] +// * COMISD m64, xmm [SSE2] +// +func (self *Program) COMISD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("COMISD", 2, Operands { v0, v1 }) + // COMISD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // COMISD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for COMISD") + } + return p +} + +// COMISS performs "Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : COMISS +// Supported forms : (2 forms) +// +// * COMISS xmm, xmm [SSE] +// * COMISS m32, xmm [SSE] +// +func (self *Program) COMISS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("COMISS", 2, Operands { v0, v1 }) + // COMISS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // COMISS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for COMISS") + } + return p +} + +// CPUID performs "CPU Identification". +// +// Mnemonic : CPUID +// Supported forms : (1 form) +// +// * CPUID [CPUID] +// +func (self *Program) CPUID() *Instruction { + p := self.alloc("CPUID", 0, Operands { }) + // CPUID + self.require(ISA_CPUID) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0xa2) + }) + return p +} + +// CQTO performs "Convert Quadword to Octaword". +// +// Mnemonic : CQO +// Supported forms : (1 form) +// +// * CQTO +// +func (self *Program) CQTO() *Instruction { + p := self.alloc("CQTO", 0, Operands { }) + // CQTO + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x99) + }) + return p +} + +// CRC32B performs "Accumulate CRC32 Value". +// +// Mnemonic : CRC32 +// Supported forms : (4 forms) +// +// * CRC32B r8, r32 [SSE4.2] +// * CRC32B m8, r32 [SSE4.2] +// * CRC32B r8, r64 [SSE4.2] +// * CRC32B m8, r64 [SSE4.2] +// +func (self *Program) CRC32B(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CRC32B", 2, Operands { v0, v1 }) + // CRC32B r8, r32 + if isReg8(v0) && isReg32(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf0) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CRC32B m8, r32 + if isM8(v0) && isReg32(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CRC32B r8, r64 + if isReg8(v0) && isReg64(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf0) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CRC32B m8, r64 + if isM8(v0) && isReg64(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CRC32B") + } + return p +} + +// CRC32L performs "Accumulate CRC32 Value". +// +// Mnemonic : CRC32 +// Supported forms : (2 forms) +// +// * CRC32L r32, r32 [SSE4.2] +// * CRC32L m32, r32 [SSE4.2] +// +func (self *Program) CRC32L(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CRC32L", 2, Operands { v0, v1 }) + // CRC32L r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CRC32L m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CRC32L") + } + return p +} + +// CRC32Q performs "Accumulate CRC32 Value". +// +// Mnemonic : CRC32 +// Supported forms : (2 forms) +// +// * CRC32Q r64, r64 [SSE4.2] +// * CRC32Q m64, r64 [SSE4.2] +// +func (self *Program) CRC32Q(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CRC32Q", 2, Operands { v0, v1 }) + // CRC32Q r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CRC32Q m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CRC32Q") + } + return p +} + +// CRC32W performs "Accumulate CRC32 Value". +// +// Mnemonic : CRC32 +// Supported forms : (2 forms) +// +// * CRC32W r16, r32 [SSE4.2] +// * CRC32W m16, r32 [SSE4.2] +// +func (self *Program) CRC32W(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CRC32W", 2, Operands { v0, v1 }) + // CRC32W r16, r32 + if isReg16(v0) && isReg32(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CRC32W m16, r32 + if isM16(v0) && isReg32(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CRC32W") + } + return p +} + +// CVTDQ2PD performs "Convert Packed Dword Integers to Packed Double-Precision FP Values". +// +// Mnemonic : CVTDQ2PD +// Supported forms : (2 forms) +// +// * CVTDQ2PD xmm, xmm [SSE2] +// * CVTDQ2PD m64, xmm [SSE2] +// +func (self *Program) CVTDQ2PD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTDQ2PD", 2, Operands { v0, v1 }) + // CVTDQ2PD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTDQ2PD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTDQ2PD") + } + return p +} + +// CVTDQ2PS performs "Convert Packed Dword Integers to Packed Single-Precision FP Values". +// +// Mnemonic : CVTDQ2PS +// Supported forms : (2 forms) +// +// * CVTDQ2PS xmm, xmm [SSE2] +// * CVTDQ2PS m128, xmm [SSE2] +// +func (self *Program) CVTDQ2PS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTDQ2PS", 2, Operands { v0, v1 }) + // CVTDQ2PS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTDQ2PS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTDQ2PS") + } + return p +} + +// CVTPD2DQ performs "Convert Packed Double-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTPD2DQ +// Supported forms : (2 forms) +// +// * CVTPD2DQ xmm, xmm [SSE2] +// * CVTPD2DQ m128, xmm [SSE2] +// +func (self *Program) CVTPD2DQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPD2DQ", 2, Operands { v0, v1 }) + // CVTPD2DQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPD2DQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPD2DQ") + } + return p +} + +// CVTPD2PI performs "Convert Packed Double-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTPD2PI +// Supported forms : (2 forms) +// +// * CVTPD2PI xmm, mm [SSE] +// * CVTPD2PI m128, mm [SSE] +// +func (self *Program) CVTPD2PI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPD2PI", 2, Operands { v0, v1 }) + // CVTPD2PI xmm, mm + if isXMM(v0) && isMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPD2PI m128, mm + if isM128(v0) && isMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPD2PI") + } + return p +} + +// CVTPD2PS performs "Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values". +// +// Mnemonic : CVTPD2PS +// Supported forms : (2 forms) +// +// * CVTPD2PS xmm, xmm [SSE2] +// * CVTPD2PS m128, xmm [SSE2] +// +func (self *Program) CVTPD2PS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPD2PS", 2, Operands { v0, v1 }) + // CVTPD2PS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPD2PS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPD2PS") + } + return p +} + +// CVTPI2PD performs "Convert Packed Dword Integers to Packed Double-Precision FP Values". +// +// Mnemonic : CVTPI2PD +// Supported forms : (2 forms) +// +// * CVTPI2PD mm, xmm [SSE2] +// * CVTPI2PD m64, xmm [SSE2] +// +func (self *Program) CVTPI2PD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPI2PD", 2, Operands { v0, v1 }) + // CVTPI2PD mm, xmm + if isMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPI2PD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPI2PD") + } + return p +} + +// CVTPI2PS performs "Convert Packed Dword Integers to Packed Single-Precision FP Values". +// +// Mnemonic : CVTPI2PS +// Supported forms : (2 forms) +// +// * CVTPI2PS mm, xmm [SSE] +// * CVTPI2PS m64, xmm [SSE] +// +func (self *Program) CVTPI2PS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPI2PS", 2, Operands { v0, v1 }) + // CVTPI2PS mm, xmm + if isMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPI2PS m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPI2PS") + } + return p +} + +// CVTPS2DQ performs "Convert Packed Single-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTPS2DQ +// Supported forms : (2 forms) +// +// * CVTPS2DQ xmm, xmm [SSE2] +// * CVTPS2DQ m128, xmm [SSE2] +// +func (self *Program) CVTPS2DQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPS2DQ", 2, Operands { v0, v1 }) + // CVTPS2DQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPS2DQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPS2DQ") + } + return p +} + +// CVTPS2PD performs "Convert Packed Single-Precision FP Values to Packed Double-Precision FP Values". +// +// Mnemonic : CVTPS2PD +// Supported forms : (2 forms) +// +// * CVTPS2PD xmm, xmm [SSE2] +// * CVTPS2PD m64, xmm [SSE2] +// +func (self *Program) CVTPS2PD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPS2PD", 2, Operands { v0, v1 }) + // CVTPS2PD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPS2PD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPS2PD") + } + return p +} + +// CVTPS2PI performs "Convert Packed Single-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTPS2PI +// Supported forms : (2 forms) +// +// * CVTPS2PI xmm, mm [SSE] +// * CVTPS2PI m64, mm [SSE] +// +func (self *Program) CVTPS2PI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPS2PI", 2, Operands { v0, v1 }) + // CVTPS2PI xmm, mm + if isXMM(v0) && isMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPS2PI m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPS2PI") + } + return p +} + +// CVTSD2SI performs "Convert Scalar Double-Precision FP Value to Integer". +// +// Mnemonic : CVTSD2SI +// Supported forms : (4 forms) +// +// * CVTSD2SI xmm, r32 [SSE2] +// * CVTSD2SI m64, r32 [SSE2] +// * CVTSD2SI xmm, r64 [SSE2] +// * CVTSD2SI m64, r64 [SSE2] +// +func (self *Program) CVTSD2SI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTSD2SI", 2, Operands { v0, v1 }) + // CVTSD2SI xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSD2SI m64, r32 + if isM64(v0) && isReg32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CVTSD2SI xmm, r64 + if isXMM(v0) && isReg64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSD2SI m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTSD2SI") + } + return p +} + +// CVTSD2SS performs "Convert Scalar Double-Precision FP Value to Scalar Single-Precision FP Value". +// +// Mnemonic : CVTSD2SS +// Supported forms : (2 forms) +// +// * CVTSD2SS xmm, xmm [SSE2] +// * CVTSD2SS m64, xmm [SSE2] +// +func (self *Program) CVTSD2SS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTSD2SS", 2, Operands { v0, v1 }) + // CVTSD2SS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSD2SS m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTSD2SS") + } + return p +} + +// CVTSI2SD performs "Convert Dword Integer to Scalar Double-Precision FP Value". +// +// Mnemonic : CVTSI2SD +// Supported forms : (4 forms) +// +// * CVTSI2SD r32, xmm [SSE2] +// * CVTSI2SD r64, xmm [SSE2] +// * CVTSI2SD m32, xmm [SSE2] +// * CVTSI2SD m64, xmm [SSE2] +// +func (self *Program) CVTSI2SD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTSI2SD", 2, Operands { v0, v1 }) + // CVTSI2SD r32, xmm + if isReg32(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSI2SD r64, xmm + if isReg64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSI2SD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CVTSI2SD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTSI2SD") + } + return p +} + +// CVTSI2SS performs "Convert Dword Integer to Scalar Single-Precision FP Value". +// +// Mnemonic : CVTSI2SS +// Supported forms : (4 forms) +// +// * CVTSI2SS r32, xmm [SSE] +// * CVTSI2SS r64, xmm [SSE] +// * CVTSI2SS m32, xmm [SSE] +// * CVTSI2SS m64, xmm [SSE] +// +func (self *Program) CVTSI2SS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTSI2SS", 2, Operands { v0, v1 }) + // CVTSI2SS r32, xmm + if isReg32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSI2SS r64, xmm + if isReg64(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSI2SS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CVTSI2SS m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTSI2SS") + } + return p +} + +// CVTSS2SD performs "Convert Scalar Single-Precision FP Value to Scalar Double-Precision FP Value". +// +// Mnemonic : CVTSS2SD +// Supported forms : (2 forms) +// +// * CVTSS2SD xmm, xmm [SSE2] +// * CVTSS2SD m32, xmm [SSE2] +// +func (self *Program) CVTSS2SD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTSS2SD", 2, Operands { v0, v1 }) + // CVTSS2SD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSS2SD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTSS2SD") + } + return p +} + +// CVTSS2SI performs "Convert Scalar Single-Precision FP Value to Dword Integer". +// +// Mnemonic : CVTSS2SI +// Supported forms : (4 forms) +// +// * CVTSS2SI xmm, r32 [SSE] +// * CVTSS2SI m32, r32 [SSE] +// * CVTSS2SI xmm, r64 [SSE] +// * CVTSS2SI m32, r64 [SSE] +// +func (self *Program) CVTSS2SI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTSS2SI", 2, Operands { v0, v1 }) + // CVTSS2SI xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSS2SI m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CVTSS2SI xmm, r64 + if isXMM(v0) && isReg64(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSS2SI m32, r64 + if isM32(v0) && isReg64(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTSS2SI") + } + return p +} + +// CVTTPD2DQ performs "Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTTPD2DQ +// Supported forms : (2 forms) +// +// * CVTTPD2DQ xmm, xmm [SSE2] +// * CVTTPD2DQ m128, xmm [SSE2] +// +func (self *Program) CVTTPD2DQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTTPD2DQ", 2, Operands { v0, v1 }) + // CVTTPD2DQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTPD2DQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTTPD2DQ") + } + return p +} + +// CVTTPD2PI performs "Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTTPD2PI +// Supported forms : (2 forms) +// +// * CVTTPD2PI xmm, mm [SSE2] +// * CVTTPD2PI m128, mm [SSE2] +// +func (self *Program) CVTTPD2PI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTTPD2PI", 2, Operands { v0, v1 }) + // CVTTPD2PI xmm, mm + if isXMM(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTPD2PI m128, mm + if isM128(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTTPD2PI") + } + return p +} + +// CVTTPS2DQ performs "Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTTPS2DQ +// Supported forms : (2 forms) +// +// * CVTTPS2DQ xmm, xmm [SSE2] +// * CVTTPS2DQ m128, xmm [SSE2] +// +func (self *Program) CVTTPS2DQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTTPS2DQ", 2, Operands { v0, v1 }) + // CVTTPS2DQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTPS2DQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTTPS2DQ") + } + return p +} + +// CVTTPS2PI performs "Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTTPS2PI +// Supported forms : (2 forms) +// +// * CVTTPS2PI xmm, mm [SSE] +// * CVTTPS2PI m64, mm [SSE] +// +func (self *Program) CVTTPS2PI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTTPS2PI", 2, Operands { v0, v1 }) + // CVTTPS2PI xmm, mm + if isXMM(v0) && isMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTPS2PI m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTTPS2PI") + } + return p +} + +// CVTTSD2SI performs "Convert with Truncation Scalar Double-Precision FP Value to Signed Integer". +// +// Mnemonic : CVTTSD2SI +// Supported forms : (4 forms) +// +// * CVTTSD2SI xmm, r32 [SSE2] +// * CVTTSD2SI m64, r32 [SSE2] +// * CVTTSD2SI xmm, r64 [SSE2] +// * CVTTSD2SI m64, r64 [SSE2] +// +func (self *Program) CVTTSD2SI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTTSD2SI", 2, Operands { v0, v1 }) + // CVTTSD2SI xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTSD2SI m64, r32 + if isM64(v0) && isReg32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CVTTSD2SI xmm, r64 + if isXMM(v0) && isReg64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTSD2SI m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTTSD2SI") + } + return p +} + +// CVTTSS2SI performs "Convert with Truncation Scalar Single-Precision FP Value to Dword Integer". +// +// Mnemonic : CVTTSS2SI +// Supported forms : (4 forms) +// +// * CVTTSS2SI xmm, r32 [SSE] +// * CVTTSS2SI m32, r32 [SSE] +// * CVTTSS2SI xmm, r64 [SSE] +// * CVTTSS2SI m32, r64 [SSE] +// +func (self *Program) CVTTSS2SI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTTSS2SI", 2, Operands { v0, v1 }) + // CVTTSS2SI xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTSS2SI m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CVTTSS2SI xmm, r64 + if isXMM(v0) && isReg64(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTSS2SI m32, r64 + if isM32(v0) && isReg64(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTTSS2SI") + } + return p +} + +// CWTD performs "Convert Word to Doubleword". +// +// Mnemonic : CWD +// Supported forms : (1 form) +// +// * CWTD +// +func (self *Program) CWTD() *Instruction { + p := self.alloc("CWTD", 0, Operands { }) + // CWTD + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x99) + }) + return p +} + +// CWTL performs "Convert Word to Doubleword". +// +// Mnemonic : CWDE +// Supported forms : (1 form) +// +// * CWTL +// +func (self *Program) CWTL() *Instruction { + p := self.alloc("CWTL", 0, Operands { }) + // CWTL + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x98) + }) + return p +} + +// DECB performs "Decrement by 1". +// +// Mnemonic : DEC +// Supported forms : (2 forms) +// +// * DECB r8 +// * DECB m8 +// +func (self *Program) DECB(v0 interface{}) *Instruction { + p := self.alloc("DECB", 1, Operands { v0 }) + // DECB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xfe) + m.emit(0xc8 | lcode(v[0])) + }) + } + // DECB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xfe) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DECB") + } + return p +} + +// DECL performs "Decrement by 1". +// +// Mnemonic : DEC +// Supported forms : (2 forms) +// +// * DECL r32 +// * DECL m32 +// +func (self *Program) DECL(v0 interface{}) *Instruction { + p := self.alloc("DECL", 1, Operands { v0 }) + // DECL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xc8 | lcode(v[0])) + }) + } + // DECL m32 + if isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DECL") + } + return p +} + +// DECQ performs "Decrement by 1". +// +// Mnemonic : DEC +// Supported forms : (2 forms) +// +// * DECQ r64 +// * DECQ m64 +// +func (self *Program) DECQ(v0 interface{}) *Instruction { + p := self.alloc("DECQ", 1, Operands { v0 }) + // DECQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xff) + m.emit(0xc8 | lcode(v[0])) + }) + } + // DECQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xff) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DECQ") + } + return p +} + +// DECW performs "Decrement by 1". +// +// Mnemonic : DEC +// Supported forms : (2 forms) +// +// * DECW r16 +// * DECW m16 +// +func (self *Program) DECW(v0 interface{}) *Instruction { + p := self.alloc("DECW", 1, Operands { v0 }) + // DECW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xc8 | lcode(v[0])) + }) + } + // DECW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DECW") + } + return p +} + +// DIVB performs "Unsigned Divide". +// +// Mnemonic : DIV +// Supported forms : (2 forms) +// +// * DIVB r8 +// * DIVB m8 +// +func (self *Program) DIVB(v0 interface{}) *Instruction { + p := self.alloc("DIVB", 1, Operands { v0 }) + // DIVB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xf6) + m.emit(0xf0 | lcode(v[0])) + }) + } + // DIVB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf6) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVB") + } + return p +} + +// DIVL performs "Unsigned Divide". +// +// Mnemonic : DIV +// Supported forms : (2 forms) +// +// * DIVL r32 +// * DIVL m32 +// +func (self *Program) DIVL(v0 interface{}) *Instruction { + p := self.alloc("DIVL", 1, Operands { v0 }) + // DIVL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xf0 | lcode(v[0])) + }) + } + // DIVL m32 + if isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVL") + } + return p +} + +// DIVPD performs "Divide Packed Double-Precision Floating-Point Values". +// +// Mnemonic : DIVPD +// Supported forms : (2 forms) +// +// * DIVPD xmm, xmm [SSE2] +// * DIVPD m128, xmm [SSE2] +// +func (self *Program) DIVPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("DIVPD", 2, Operands { v0, v1 }) + // DIVPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // DIVPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVPD") + } + return p +} + +// DIVPS performs "Divide Packed Single-Precision Floating-Point Values". +// +// Mnemonic : DIVPS +// Supported forms : (2 forms) +// +// * DIVPS xmm, xmm [SSE] +// * DIVPS m128, xmm [SSE] +// +func (self *Program) DIVPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("DIVPS", 2, Operands { v0, v1 }) + // DIVPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // DIVPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVPS") + } + return p +} + +// DIVQ performs "Unsigned Divide". +// +// Mnemonic : DIV +// Supported forms : (2 forms) +// +// * DIVQ r64 +// * DIVQ m64 +// +func (self *Program) DIVQ(v0 interface{}) *Instruction { + p := self.alloc("DIVQ", 1, Operands { v0 }) + // DIVQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xf7) + m.emit(0xf0 | lcode(v[0])) + }) + } + // DIVQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xf7) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVQ") + } + return p +} + +// DIVSD performs "Divide Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : DIVSD +// Supported forms : (2 forms) +// +// * DIVSD xmm, xmm [SSE2] +// * DIVSD m64, xmm [SSE2] +// +func (self *Program) DIVSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("DIVSD", 2, Operands { v0, v1 }) + // DIVSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // DIVSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVSD") + } + return p +} + +// DIVSS performs "Divide Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : DIVSS +// Supported forms : (2 forms) +// +// * DIVSS xmm, xmm [SSE] +// * DIVSS m32, xmm [SSE] +// +func (self *Program) DIVSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("DIVSS", 2, Operands { v0, v1 }) + // DIVSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // DIVSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVSS") + } + return p +} + +// DIVW performs "Unsigned Divide". +// +// Mnemonic : DIV +// Supported forms : (2 forms) +// +// * DIVW r16 +// * DIVW m16 +// +func (self *Program) DIVW(v0 interface{}) *Instruction { + p := self.alloc("DIVW", 1, Operands { v0 }) + // DIVW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xf0 | lcode(v[0])) + }) + } + // DIVW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVW") + } + return p +} + +// DPPD performs "Dot Product of Packed Double Precision Floating-Point Values". +// +// Mnemonic : DPPD +// Supported forms : (2 forms) +// +// * DPPD imm8, xmm, xmm [SSE4.1] +// * DPPD imm8, m128, xmm [SSE4.1] +// +func (self *Program) DPPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("DPPD", 3, Operands { v0, v1, v2 }) + // DPPD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x41) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // DPPD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x41) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for DPPD") + } + return p +} + +// DPPS performs "Dot Product of Packed Single Precision Floating-Point Values". +// +// Mnemonic : DPPS +// Supported forms : (2 forms) +// +// * DPPS imm8, xmm, xmm [SSE4.1] +// * DPPS imm8, m128, xmm [SSE4.1] +// +func (self *Program) DPPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("DPPS", 3, Operands { v0, v1, v2 }) + // DPPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // DPPS imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for DPPS") + } + return p +} + +// EMMS performs "Exit MMX State". +// +// Mnemonic : EMMS +// Supported forms : (1 form) +// +// * EMMS [MMX] +// +func (self *Program) EMMS() *Instruction { + p := self.alloc("EMMS", 0, Operands { }) + // EMMS + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x77) + }) + return p +} + +// EXTRACTPS performs "Extract Packed Single Precision Floating-Point Value". +// +// Mnemonic : EXTRACTPS +// Supported forms : (2 forms) +// +// * EXTRACTPS imm8, xmm, r32 [SSE4.1] +// * EXTRACTPS imm8, xmm, m32 [SSE4.1] +// +func (self *Program) EXTRACTPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("EXTRACTPS", 3, Operands { v0, v1, v2 }) + // EXTRACTPS imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x17) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // EXTRACTPS imm8, xmm, m32 + if isImm8(v0) && isXMM(v1) && isM32(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x17) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for EXTRACTPS") + } + return p +} + +// EXTRQ performs "Extract Field". +// +// Mnemonic : EXTRQ +// Supported forms : (2 forms) +// +// * EXTRQ xmm, xmm [SSE4A] +// * EXTRQ imm8, imm8, xmm [SSE4A] +// +func (self *Program) EXTRQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("EXTRQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("EXTRQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction EXTRQ takes 2 or 3 operands") + } + // EXTRQ xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4A) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // EXTRQ imm8, imm8, xmm + if len(vv) == 1 && isImm8(v0) && isImm8(v1) && isXMM(vv[0]) { + self.require(ISA_SSE4A) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[2], false) + m.emit(0x0f) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2])) + m.imm1(toImmAny(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for EXTRQ") + } + return p +} + +// FEMMS performs "Fast Exit Multimedia State". +// +// Mnemonic : FEMMS +// Supported forms : (1 form) +// +// * FEMMS [FEMMS] +// +func (self *Program) FEMMS() *Instruction { + p := self.alloc("FEMMS", 0, Operands { }) + // FEMMS + self.require(ISA_FEMMS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x0e) + }) + return p +} + +// HADDPD performs "Packed Double-FP Horizontal Add". +// +// Mnemonic : HADDPD +// Supported forms : (2 forms) +// +// * HADDPD xmm, xmm [SSE3] +// * HADDPD m128, xmm [SSE3] +// +func (self *Program) HADDPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("HADDPD", 2, Operands { v0, v1 }) + // HADDPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // HADDPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x7c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for HADDPD") + } + return p +} + +// HADDPS performs "Packed Single-FP Horizontal Add". +// +// Mnemonic : HADDPS +// Supported forms : (2 forms) +// +// * HADDPS xmm, xmm [SSE3] +// * HADDPS m128, xmm [SSE3] +// +func (self *Program) HADDPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("HADDPS", 2, Operands { v0, v1 }) + // HADDPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // HADDPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x7c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for HADDPS") + } + return p +} + +// HSUBPD performs "Packed Double-FP Horizontal Subtract". +// +// Mnemonic : HSUBPD +// Supported forms : (2 forms) +// +// * HSUBPD xmm, xmm [SSE3] +// * HSUBPD m128, xmm [SSE3] +// +func (self *Program) HSUBPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("HSUBPD", 2, Operands { v0, v1 }) + // HSUBPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // HSUBPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x7d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for HSUBPD") + } + return p +} + +// HSUBPS performs "Packed Single-FP Horizontal Subtract". +// +// Mnemonic : HSUBPS +// Supported forms : (2 forms) +// +// * HSUBPS xmm, xmm [SSE3] +// * HSUBPS m128, xmm [SSE3] +// +func (self *Program) HSUBPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("HSUBPS", 2, Operands { v0, v1 }) + // HSUBPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // HSUBPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x7d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for HSUBPS") + } + return p +} + +// IDIVB performs "Signed Divide". +// +// Mnemonic : IDIV +// Supported forms : (2 forms) +// +// * IDIVB r8 +// * IDIVB m8 +// +func (self *Program) IDIVB(v0 interface{}) *Instruction { + p := self.alloc("IDIVB", 1, Operands { v0 }) + // IDIVB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xf6) + m.emit(0xf8 | lcode(v[0])) + }) + } + // IDIVB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf6) + m.mrsd(7, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for IDIVB") + } + return p +} + +// IDIVL performs "Signed Divide". +// +// Mnemonic : IDIV +// Supported forms : (2 forms) +// +// * IDIVL r32 +// * IDIVL m32 +// +func (self *Program) IDIVL(v0 interface{}) *Instruction { + p := self.alloc("IDIVL", 1, Operands { v0 }) + // IDIVL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xf8 | lcode(v[0])) + }) + } + // IDIVL m32 + if isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(7, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for IDIVL") + } + return p +} + +// IDIVQ performs "Signed Divide". +// +// Mnemonic : IDIV +// Supported forms : (2 forms) +// +// * IDIVQ r64 +// * IDIVQ m64 +// +func (self *Program) IDIVQ(v0 interface{}) *Instruction { + p := self.alloc("IDIVQ", 1, Operands { v0 }) + // IDIVQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xf7) + m.emit(0xf8 | lcode(v[0])) + }) + } + // IDIVQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xf7) + m.mrsd(7, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for IDIVQ") + } + return p +} + +// IDIVW performs "Signed Divide". +// +// Mnemonic : IDIV +// Supported forms : (2 forms) +// +// * IDIVW r16 +// * IDIVW m16 +// +func (self *Program) IDIVW(v0 interface{}) *Instruction { + p := self.alloc("IDIVW", 1, Operands { v0 }) + // IDIVW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xf8 | lcode(v[0])) + }) + } + // IDIVW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(7, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for IDIVW") + } + return p +} + +// IMULB performs "Signed Multiply". +// +// Mnemonic : IMUL +// Supported forms : (2 forms) +// +// * IMULB r8 +// * IMULB m8 +// +func (self *Program) IMULB(v0 interface{}) *Instruction { + p := self.alloc("IMULB", 1, Operands { v0 }) + // IMULB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xf6) + m.emit(0xe8 | lcode(v[0])) + }) + } + // IMULB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf6) + m.mrsd(5, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for IMULB") + } + return p +} + +// IMULL performs "Signed Multiply". +// +// Mnemonic : IMUL +// Supported forms : (8 forms) +// +// * IMULL r32 +// * IMULL m32 +// * IMULL r32, r32 +// * IMULL m32, r32 +// * IMULL imm8, r32, r32 +// * IMULL imm32, r32, r32 +// * IMULL imm8, m32, r32 +// * IMULL imm32, m32, r32 +// +func (self *Program) IMULL(v0 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("IMULL", 1, Operands { v0 }) + case 1 : p = self.alloc("IMULL", 2, Operands { v0, vv[0] }) + case 2 : p = self.alloc("IMULL", 3, Operands { v0, vv[0], vv[1] }) + default : panic("instruction IMULL takes 1 or 2 or 3 operands") + } + // IMULL r32 + if len(vv) == 0 && isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xe8 | lcode(v[0])) + }) + } + // IMULL m32 + if len(vv) == 0 && isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(5, addr(v[0]), 1) + }) + } + // IMULL r32, r32 + if len(vv) == 1 && isReg32(v0) && isReg32(vv[0]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // IMULL m32, r32 + if len(vv) == 1 && isM32(v0) && isReg32(vv[0]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xaf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // IMULL imm8, r32, r32 + if len(vv) == 2 && isImm8(v0) && isReg32(vv[0]) && isReg32(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // IMULL imm32, r32, r32 + if len(vv) == 2 && isImm32(v0) && isReg32(vv[0]) && isReg32(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // IMULL imm8, m32, r32 + if len(vv) == 2 && isImm8(v0) && isM32(vv[0]) && isReg32(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // IMULL imm32, m32, r32 + if len(vv) == 2 && isImm32(v0) && isM32(vv[0]) && isReg32(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for IMULL") + } + return p +} + +// IMULQ performs "Signed Multiply". +// +// Mnemonic : IMUL +// Supported forms : (8 forms) +// +// * IMULQ r64 +// * IMULQ m64 +// * IMULQ r64, r64 +// * IMULQ m64, r64 +// * IMULQ imm8, r64, r64 +// * IMULQ imm32, r64, r64 +// * IMULQ imm8, m64, r64 +// * IMULQ imm32, m64, r64 +// +func (self *Program) IMULQ(v0 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("IMULQ", 1, Operands { v0 }) + case 1 : p = self.alloc("IMULQ", 2, Operands { v0, vv[0] }) + case 2 : p = self.alloc("IMULQ", 3, Operands { v0, vv[0], vv[1] }) + default : panic("instruction IMULQ takes 1 or 2 or 3 operands") + } + // IMULQ r64 + if len(vv) == 0 && isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xf7) + m.emit(0xe8 | lcode(v[0])) + }) + } + // IMULQ m64 + if len(vv) == 0 && isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xf7) + m.mrsd(5, addr(v[0]), 1) + }) + } + // IMULQ r64, r64 + if len(vv) == 1 && isReg64(v0) && isReg64(vv[0]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // IMULQ m64, r64 + if len(vv) == 1 && isM64(v0) && isReg64(vv[0]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xaf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // IMULQ imm8, r64, r64 + if len(vv) == 2 && isImm8(v0) && isReg64(vv[0]) && isReg64(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[2]) << 2 | hcode(v[1])) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // IMULQ imm32, r64, r64 + if len(vv) == 2 && isImm32(v0) && isReg64(vv[0]) && isReg64(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[2]) << 2 | hcode(v[1])) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // IMULQ imm8, m64, r64 + if len(vv) == 2 && isImm8(v0) && isM64(vv[0]) && isReg64(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[2]), addr(v[1])) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // IMULQ imm32, m64, r64 + if len(vv) == 2 && isImm32(v0) && isM64(vv[0]) && isReg64(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[2]), addr(v[1])) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for IMULQ") + } + return p +} + +// IMULW performs "Signed Multiply". +// +// Mnemonic : IMUL +// Supported forms : (8 forms) +// +// * IMULW r16 +// * IMULW m16 +// * IMULW r16, r16 +// * IMULW m16, r16 +// * IMULW imm8, r16, r16 +// * IMULW imm16, r16, r16 +// * IMULW imm8, m16, r16 +// * IMULW imm16, m16, r16 +// +func (self *Program) IMULW(v0 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("IMULW", 1, Operands { v0 }) + case 1 : p = self.alloc("IMULW", 2, Operands { v0, vv[0] }) + case 2 : p = self.alloc("IMULW", 3, Operands { v0, vv[0], vv[1] }) + default : panic("instruction IMULW takes 1 or 2 or 3 operands") + } + // IMULW r16 + if len(vv) == 0 && isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xe8 | lcode(v[0])) + }) + } + // IMULW m16 + if len(vv) == 0 && isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(5, addr(v[0]), 1) + }) + } + // IMULW r16, r16 + if len(vv) == 1 && isReg16(v0) && isReg16(vv[0]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // IMULW m16, r16 + if len(vv) == 1 && isM16(v0) && isReg16(vv[0]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xaf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // IMULW imm8, r16, r16 + if len(vv) == 2 && isImm8(v0) && isReg16(vv[0]) && isReg16(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // IMULW imm16, r16, r16 + if len(vv) == 2 && isImm16(v0) && isReg16(vv[0]) && isReg16(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // IMULW imm8, m16, r16 + if len(vv) == 2 && isImm8(v0) && isM16(vv[0]) && isReg16(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // IMULW imm16, m16, r16 + if len(vv) == 2 && isImm16(v0) && isM16(vv[0]) && isReg16(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for IMULW") + } + return p +} + +// INCB performs "Increment by 1". +// +// Mnemonic : INC +// Supported forms : (2 forms) +// +// * INCB r8 +// * INCB m8 +// +func (self *Program) INCB(v0 interface{}) *Instruction { + p := self.alloc("INCB", 1, Operands { v0 }) + // INCB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[0])) + }) + } + // INCB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xfe) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for INCB") + } + return p +} + +// INCL performs "Increment by 1". +// +// Mnemonic : INC +// Supported forms : (2 forms) +// +// * INCL r32 +// * INCL m32 +// +func (self *Program) INCL(v0 interface{}) *Instruction { + p := self.alloc("INCL", 1, Operands { v0 }) + // INCL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xc0 | lcode(v[0])) + }) + } + // INCL m32 + if isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for INCL") + } + return p +} + +// INCQ performs "Increment by 1". +// +// Mnemonic : INC +// Supported forms : (2 forms) +// +// * INCQ r64 +// * INCQ m64 +// +func (self *Program) INCQ(v0 interface{}) *Instruction { + p := self.alloc("INCQ", 1, Operands { v0 }) + // INCQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xff) + m.emit(0xc0 | lcode(v[0])) + }) + } + // INCQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xff) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for INCQ") + } + return p +} + +// INCW performs "Increment by 1". +// +// Mnemonic : INC +// Supported forms : (2 forms) +// +// * INCW r16 +// * INCW m16 +// +func (self *Program) INCW(v0 interface{}) *Instruction { + p := self.alloc("INCW", 1, Operands { v0 }) + // INCW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xc0 | lcode(v[0])) + }) + } + // INCW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for INCW") + } + return p +} + +// INSERTPS performs "Insert Packed Single Precision Floating-Point Value". +// +// Mnemonic : INSERTPS +// Supported forms : (2 forms) +// +// * INSERTPS imm8, xmm, xmm [SSE4.1] +// * INSERTPS imm8, m32, xmm [SSE4.1] +// +func (self *Program) INSERTPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("INSERTPS", 3, Operands { v0, v1, v2 }) + // INSERTPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x21) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // INSERTPS imm8, m32, xmm + if isImm8(v0) && isM32(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x21) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for INSERTPS") + } + return p +} + +// INSERTQ performs "Insert Field". +// +// Mnemonic : INSERTQ +// Supported forms : (2 forms) +// +// * INSERTQ xmm, xmm [SSE4A] +// * INSERTQ imm8, imm8, xmm, xmm [SSE4A] +// +func (self *Program) INSERTQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("INSERTQ", 2, Operands { v0, v1 }) + case 2 : p = self.alloc("INSERTQ", 4, Operands { v0, v1, vv[0], vv[1] }) + default : panic("instruction INSERTQ takes 2 or 4 operands") + } + // INSERTQ xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4A) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // INSERTQ imm8, imm8, xmm, xmm + if len(vv) == 2 && isImm8(v0) && isImm8(v1) && isXMM(vv[0]) && isXMM(vv[1]) { + self.require(ISA_SSE4A) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[3]), v[2], false) + m.emit(0x0f) + m.emit(0x78) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for INSERTQ") + } + return p +} + +// INT performs "Call to Interrupt Procedure". +// +// Mnemonic : INT +// Supported forms : (2 forms) +// +// * INT 3 +// * INT imm8 +// +func (self *Program) INT(v0 interface{}) *Instruction { + p := self.alloc("INT", 1, Operands { v0 }) + // INT 3 + if isConst3(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xcc) + }) + } + // INT imm8 + if isImm8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xcd) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for INT") + } + return p +} + +// JA performs "Jump if above (CF == 0 and ZF == 0)". +// +// Mnemonic : JA +// Supported forms : (2 forms) +// +// * JA rel8 +// * JA rel32 +// +func (self *Program) JA(v0 interface{}) *Instruction { + p := self.alloc("JA", 1, Operands { v0 }) + p.branch = _B_conditional + // JA rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x77) + m.imm1(relv(v[0])) + }) + } + // JA rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x87) + m.imm4(relv(v[0])) + }) + } + // JA label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x77) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x87) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JA") + } + return p +} + +// JAE performs "Jump if above or equal (CF == 0)". +// +// Mnemonic : JAE +// Supported forms : (2 forms) +// +// * JAE rel8 +// * JAE rel32 +// +func (self *Program) JAE(v0 interface{}) *Instruction { + p := self.alloc("JAE", 1, Operands { v0 }) + p.branch = _B_conditional + // JAE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x73) + m.imm1(relv(v[0])) + }) + } + // JAE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x83) + m.imm4(relv(v[0])) + }) + } + // JAE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x73) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x83) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JAE") + } + return p +} + +// JB performs "Jump if below (CF == 1)". +// +// Mnemonic : JB +// Supported forms : (2 forms) +// +// * JB rel8 +// * JB rel32 +// +func (self *Program) JB(v0 interface{}) *Instruction { + p := self.alloc("JB", 1, Operands { v0 }) + p.branch = _B_conditional + // JB rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x72) + m.imm1(relv(v[0])) + }) + } + // JB rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x82) + m.imm4(relv(v[0])) + }) + } + // JB label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x72) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x82) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JB") + } + return p +} + +// JBE performs "Jump if below or equal (CF == 1 or ZF == 1)". +// +// Mnemonic : JBE +// Supported forms : (2 forms) +// +// * JBE rel8 +// * JBE rel32 +// +func (self *Program) JBE(v0 interface{}) *Instruction { + p := self.alloc("JBE", 1, Operands { v0 }) + p.branch = _B_conditional + // JBE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x76) + m.imm1(relv(v[0])) + }) + } + // JBE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x86) + m.imm4(relv(v[0])) + }) + } + // JBE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x76) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x86) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JBE") + } + return p +} + +// JC performs "Jump if carry (CF == 1)". +// +// Mnemonic : JC +// Supported forms : (2 forms) +// +// * JC rel8 +// * JC rel32 +// +func (self *Program) JC(v0 interface{}) *Instruction { + p := self.alloc("JC", 1, Operands { v0 }) + p.branch = _B_conditional + // JC rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x72) + m.imm1(relv(v[0])) + }) + } + // JC rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x82) + m.imm4(relv(v[0])) + }) + } + // JC label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x72) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x82) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JC") + } + return p +} + +// JE performs "Jump if equal (ZF == 1)". +// +// Mnemonic : JE +// Supported forms : (2 forms) +// +// * JE rel8 +// * JE rel32 +// +func (self *Program) JE(v0 interface{}) *Instruction { + p := self.alloc("JE", 1, Operands { v0 }) + p.branch = _B_conditional + // JE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x74) + m.imm1(relv(v[0])) + }) + } + // JE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x84) + m.imm4(relv(v[0])) + }) + } + // JE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x74) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x84) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JE") + } + return p +} + +// JECXZ performs "Jump if ECX register is 0". +// +// Mnemonic : JECXZ +// Supported forms : (1 form) +// +// * JECXZ rel8 +// +func (self *Program) JECXZ(v0 interface{}) *Instruction { + p := self.alloc("JECXZ", 1, Operands { v0 }) + p.branch = _B_conditional + // JECXZ rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xe3) + m.imm1(relv(v[0])) + }) + } + // JECXZ label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0xe3) + m.imm1(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JECXZ") + } + return p +} + +// JG performs "Jump if greater (ZF == 0 and SF == OF)". +// +// Mnemonic : JG +// Supported forms : (2 forms) +// +// * JG rel8 +// * JG rel32 +// +func (self *Program) JG(v0 interface{}) *Instruction { + p := self.alloc("JG", 1, Operands { v0 }) + p.branch = _B_conditional + // JG rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7f) + m.imm1(relv(v[0])) + }) + } + // JG rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8f) + m.imm4(relv(v[0])) + }) + } + // JG label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7f) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8f) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JG") + } + return p +} + +// JGE performs "Jump if greater or equal (SF == OF)". +// +// Mnemonic : JGE +// Supported forms : (2 forms) +// +// * JGE rel8 +// * JGE rel32 +// +func (self *Program) JGE(v0 interface{}) *Instruction { + p := self.alloc("JGE", 1, Operands { v0 }) + p.branch = _B_conditional + // JGE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7d) + m.imm1(relv(v[0])) + }) + } + // JGE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8d) + m.imm4(relv(v[0])) + }) + } + // JGE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7d) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8d) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JGE") + } + return p +} + +// JL performs "Jump if less (SF != OF)". +// +// Mnemonic : JL +// Supported forms : (2 forms) +// +// * JL rel8 +// * JL rel32 +// +func (self *Program) JL(v0 interface{}) *Instruction { + p := self.alloc("JL", 1, Operands { v0 }) + p.branch = _B_conditional + // JL rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7c) + m.imm1(relv(v[0])) + }) + } + // JL rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8c) + m.imm4(relv(v[0])) + }) + } + // JL label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7c) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8c) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JL") + } + return p +} + +// JLE performs "Jump if less or equal (ZF == 1 or SF != OF)". +// +// Mnemonic : JLE +// Supported forms : (2 forms) +// +// * JLE rel8 +// * JLE rel32 +// +func (self *Program) JLE(v0 interface{}) *Instruction { + p := self.alloc("JLE", 1, Operands { v0 }) + p.branch = _B_conditional + // JLE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7e) + m.imm1(relv(v[0])) + }) + } + // JLE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8e) + m.imm4(relv(v[0])) + }) + } + // JLE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7e) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8e) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JLE") + } + return p +} + +// JMP performs "Jump Unconditionally". +// +// Mnemonic : JMP +// Supported forms : (2 forms) +// +// * JMP rel8 +// * JMP rel32 +// +func (self *Program) JMP(v0 interface{}) *Instruction { + p := self.alloc("JMP", 1, Operands { v0 }) + p.branch = _B_unconditional + // JMP rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xeb) + m.imm1(relv(v[0])) + }) + } + // JMP rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xe9) + m.imm4(relv(v[0])) + }) + } + // JMP label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0xeb) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0xe9) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JMP") + } + return p +} + +// JMPQ performs "Jump Unconditionally". +// +// Mnemonic : JMP +// Supported forms : (2 forms) +// +// * JMPQ r64 +// * JMPQ m64 +// +func (self *Program) JMPQ(v0 interface{}) *Instruction { + p := self.alloc("JMPQ", 1, Operands { v0 }) + // JMPQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xe0 | lcode(v[0])) + }) + } + // JMPQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(4, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for JMPQ") + } + return p +} + +// JNA performs "Jump if not above (CF == 1 or ZF == 1)". +// +// Mnemonic : JNA +// Supported forms : (2 forms) +// +// * JNA rel8 +// * JNA rel32 +// +func (self *Program) JNA(v0 interface{}) *Instruction { + p := self.alloc("JNA", 1, Operands { v0 }) + p.branch = _B_conditional + // JNA rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x76) + m.imm1(relv(v[0])) + }) + } + // JNA rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x86) + m.imm4(relv(v[0])) + }) + } + // JNA label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x76) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x86) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNA") + } + return p +} + +// JNAE performs "Jump if not above or equal (CF == 1)". +// +// Mnemonic : JNAE +// Supported forms : (2 forms) +// +// * JNAE rel8 +// * JNAE rel32 +// +func (self *Program) JNAE(v0 interface{}) *Instruction { + p := self.alloc("JNAE", 1, Operands { v0 }) + p.branch = _B_conditional + // JNAE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x72) + m.imm1(relv(v[0])) + }) + } + // JNAE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x82) + m.imm4(relv(v[0])) + }) + } + // JNAE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x72) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x82) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNAE") + } + return p +} + +// JNB performs "Jump if not below (CF == 0)". +// +// Mnemonic : JNB +// Supported forms : (2 forms) +// +// * JNB rel8 +// * JNB rel32 +// +func (self *Program) JNB(v0 interface{}) *Instruction { + p := self.alloc("JNB", 1, Operands { v0 }) + p.branch = _B_conditional + // JNB rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x73) + m.imm1(relv(v[0])) + }) + } + // JNB rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x83) + m.imm4(relv(v[0])) + }) + } + // JNB label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x73) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x83) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNB") + } + return p +} + +// JNBE performs "Jump if not below or equal (CF == 0 and ZF == 0)". +// +// Mnemonic : JNBE +// Supported forms : (2 forms) +// +// * JNBE rel8 +// * JNBE rel32 +// +func (self *Program) JNBE(v0 interface{}) *Instruction { + p := self.alloc("JNBE", 1, Operands { v0 }) + p.branch = _B_conditional + // JNBE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x77) + m.imm1(relv(v[0])) + }) + } + // JNBE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x87) + m.imm4(relv(v[0])) + }) + } + // JNBE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x77) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x87) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNBE") + } + return p +} + +// JNC performs "Jump if not carry (CF == 0)". +// +// Mnemonic : JNC +// Supported forms : (2 forms) +// +// * JNC rel8 +// * JNC rel32 +// +func (self *Program) JNC(v0 interface{}) *Instruction { + p := self.alloc("JNC", 1, Operands { v0 }) + p.branch = _B_conditional + // JNC rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x73) + m.imm1(relv(v[0])) + }) + } + // JNC rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x83) + m.imm4(relv(v[0])) + }) + } + // JNC label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x73) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x83) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNC") + } + return p +} + +// JNE performs "Jump if not equal (ZF == 0)". +// +// Mnemonic : JNE +// Supported forms : (2 forms) +// +// * JNE rel8 +// * JNE rel32 +// +func (self *Program) JNE(v0 interface{}) *Instruction { + p := self.alloc("JNE", 1, Operands { v0 }) + p.branch = _B_conditional + // JNE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x75) + m.imm1(relv(v[0])) + }) + } + // JNE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x85) + m.imm4(relv(v[0])) + }) + } + // JNE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x75) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x85) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNE") + } + return p +} + +// JNG performs "Jump if not greater (ZF == 1 or SF != OF)". +// +// Mnemonic : JNG +// Supported forms : (2 forms) +// +// * JNG rel8 +// * JNG rel32 +// +func (self *Program) JNG(v0 interface{}) *Instruction { + p := self.alloc("JNG", 1, Operands { v0 }) + p.branch = _B_conditional + // JNG rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7e) + m.imm1(relv(v[0])) + }) + } + // JNG rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8e) + m.imm4(relv(v[0])) + }) + } + // JNG label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7e) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8e) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNG") + } + return p +} + +// JNGE performs "Jump if not greater or equal (SF != OF)". +// +// Mnemonic : JNGE +// Supported forms : (2 forms) +// +// * JNGE rel8 +// * JNGE rel32 +// +func (self *Program) JNGE(v0 interface{}) *Instruction { + p := self.alloc("JNGE", 1, Operands { v0 }) + p.branch = _B_conditional + // JNGE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7c) + m.imm1(relv(v[0])) + }) + } + // JNGE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8c) + m.imm4(relv(v[0])) + }) + } + // JNGE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7c) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8c) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNGE") + } + return p +} + +// JNL performs "Jump if not less (SF == OF)". +// +// Mnemonic : JNL +// Supported forms : (2 forms) +// +// * JNL rel8 +// * JNL rel32 +// +func (self *Program) JNL(v0 interface{}) *Instruction { + p := self.alloc("JNL", 1, Operands { v0 }) + p.branch = _B_conditional + // JNL rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7d) + m.imm1(relv(v[0])) + }) + } + // JNL rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8d) + m.imm4(relv(v[0])) + }) + } + // JNL label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7d) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8d) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNL") + } + return p +} + +// JNLE performs "Jump if not less or equal (ZF == 0 and SF == OF)". +// +// Mnemonic : JNLE +// Supported forms : (2 forms) +// +// * JNLE rel8 +// * JNLE rel32 +// +func (self *Program) JNLE(v0 interface{}) *Instruction { + p := self.alloc("JNLE", 1, Operands { v0 }) + p.branch = _B_conditional + // JNLE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7f) + m.imm1(relv(v[0])) + }) + } + // JNLE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8f) + m.imm4(relv(v[0])) + }) + } + // JNLE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7f) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8f) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNLE") + } + return p +} + +// JNO performs "Jump if not overflow (OF == 0)". +// +// Mnemonic : JNO +// Supported forms : (2 forms) +// +// * JNO rel8 +// * JNO rel32 +// +func (self *Program) JNO(v0 interface{}) *Instruction { + p := self.alloc("JNO", 1, Operands { v0 }) + p.branch = _B_conditional + // JNO rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x71) + m.imm1(relv(v[0])) + }) + } + // JNO rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x81) + m.imm4(relv(v[0])) + }) + } + // JNO label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x71) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x81) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNO") + } + return p +} + +// JNP performs "Jump if not parity (PF == 0)". +// +// Mnemonic : JNP +// Supported forms : (2 forms) +// +// * JNP rel8 +// * JNP rel32 +// +func (self *Program) JNP(v0 interface{}) *Instruction { + p := self.alloc("JNP", 1, Operands { v0 }) + p.branch = _B_conditional + // JNP rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7b) + m.imm1(relv(v[0])) + }) + } + // JNP rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8b) + m.imm4(relv(v[0])) + }) + } + // JNP label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7b) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8b) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNP") + } + return p +} + +// JNS performs "Jump if not sign (SF == 0)". +// +// Mnemonic : JNS +// Supported forms : (2 forms) +// +// * JNS rel8 +// * JNS rel32 +// +func (self *Program) JNS(v0 interface{}) *Instruction { + p := self.alloc("JNS", 1, Operands { v0 }) + p.branch = _B_conditional + // JNS rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x79) + m.imm1(relv(v[0])) + }) + } + // JNS rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x89) + m.imm4(relv(v[0])) + }) + } + // JNS label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x79) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x89) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNS") + } + return p +} + +// JNZ performs "Jump if not zero (ZF == 0)". +// +// Mnemonic : JNZ +// Supported forms : (2 forms) +// +// * JNZ rel8 +// * JNZ rel32 +// +func (self *Program) JNZ(v0 interface{}) *Instruction { + p := self.alloc("JNZ", 1, Operands { v0 }) + p.branch = _B_conditional + // JNZ rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x75) + m.imm1(relv(v[0])) + }) + } + // JNZ rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x85) + m.imm4(relv(v[0])) + }) + } + // JNZ label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x75) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x85) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNZ") + } + return p +} + +// JO performs "Jump if overflow (OF == 1)". +// +// Mnemonic : JO +// Supported forms : (2 forms) +// +// * JO rel8 +// * JO rel32 +// +func (self *Program) JO(v0 interface{}) *Instruction { + p := self.alloc("JO", 1, Operands { v0 }) + p.branch = _B_conditional + // JO rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x70) + m.imm1(relv(v[0])) + }) + } + // JO rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x80) + m.imm4(relv(v[0])) + }) + } + // JO label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x70) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x80) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JO") + } + return p +} + +// JP performs "Jump if parity (PF == 1)". +// +// Mnemonic : JP +// Supported forms : (2 forms) +// +// * JP rel8 +// * JP rel32 +// +func (self *Program) JP(v0 interface{}) *Instruction { + p := self.alloc("JP", 1, Operands { v0 }) + p.branch = _B_conditional + // JP rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7a) + m.imm1(relv(v[0])) + }) + } + // JP rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8a) + m.imm4(relv(v[0])) + }) + } + // JP label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7a) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8a) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JP") + } + return p +} + +// JPE performs "Jump if parity even (PF == 1)". +// +// Mnemonic : JPE +// Supported forms : (2 forms) +// +// * JPE rel8 +// * JPE rel32 +// +func (self *Program) JPE(v0 interface{}) *Instruction { + p := self.alloc("JPE", 1, Operands { v0 }) + p.branch = _B_conditional + // JPE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7a) + m.imm1(relv(v[0])) + }) + } + // JPE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8a) + m.imm4(relv(v[0])) + }) + } + // JPE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7a) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8a) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JPE") + } + return p +} + +// JPO performs "Jump if parity odd (PF == 0)". +// +// Mnemonic : JPO +// Supported forms : (2 forms) +// +// * JPO rel8 +// * JPO rel32 +// +func (self *Program) JPO(v0 interface{}) *Instruction { + p := self.alloc("JPO", 1, Operands { v0 }) + p.branch = _B_conditional + // JPO rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7b) + m.imm1(relv(v[0])) + }) + } + // JPO rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8b) + m.imm4(relv(v[0])) + }) + } + // JPO label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7b) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8b) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JPO") + } + return p +} + +// JRCXZ performs "Jump if RCX register is 0". +// +// Mnemonic : JRCXZ +// Supported forms : (1 form) +// +// * JRCXZ rel8 +// +func (self *Program) JRCXZ(v0 interface{}) *Instruction { + p := self.alloc("JRCXZ", 1, Operands { v0 }) + p.branch = _B_conditional + // JRCXZ rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xe3) + m.imm1(relv(v[0])) + }) + } + // JRCXZ label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0xe3) + m.imm1(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JRCXZ") + } + return p +} + +// JS performs "Jump if sign (SF == 1)". +// +// Mnemonic : JS +// Supported forms : (2 forms) +// +// * JS rel8 +// * JS rel32 +// +func (self *Program) JS(v0 interface{}) *Instruction { + p := self.alloc("JS", 1, Operands { v0 }) + p.branch = _B_conditional + // JS rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x78) + m.imm1(relv(v[0])) + }) + } + // JS rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x88) + m.imm4(relv(v[0])) + }) + } + // JS label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x78) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x88) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JS") + } + return p +} + +// JZ performs "Jump if zero (ZF == 1)". +// +// Mnemonic : JZ +// Supported forms : (2 forms) +// +// * JZ rel8 +// * JZ rel32 +// +func (self *Program) JZ(v0 interface{}) *Instruction { + p := self.alloc("JZ", 1, Operands { v0 }) + p.branch = _B_conditional + // JZ rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x74) + m.imm1(relv(v[0])) + }) + } + // JZ rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x84) + m.imm4(relv(v[0])) + }) + } + // JZ label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x74) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x84) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JZ") + } + return p +} + +// KADDB performs "ADD Two 8-bit Masks". +// +// Mnemonic : KADDB +// Supported forms : (1 form) +// +// * KADDB k, k, k [AVX512DQ] +// +func (self *Program) KADDB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KADDB", 3, Operands { v0, v1, v2 }) + // KADDB k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, nil, hlcode(v[1])) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KADDB") + } + return p +} + +// KADDD performs "ADD Two 32-bit Masks". +// +// Mnemonic : KADDD +// Supported forms : (1 form) +// +// * KADDD k, k, k [AVX512BW] +// +func (self *Program) KADDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KADDD", 3, Operands { v0, v1, v2 }) + // KADDD k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KADDD") + } + return p +} + +// KADDQ performs "ADD Two 64-bit Masks". +// +// Mnemonic : KADDQ +// Supported forms : (1 form) +// +// * KADDQ k, k, k [AVX512BW] +// +func (self *Program) KADDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KADDQ", 3, Operands { v0, v1, v2 }) + // KADDQ k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfc ^ (hlcode(v[1]) << 3)) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KADDQ") + } + return p +} + +// KADDW performs "ADD Two 16-bit Masks". +// +// Mnemonic : KADDW +// Supported forms : (1 form) +// +// * KADDW k, k, k [AVX512DQ] +// +func (self *Program) KADDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KADDW", 3, Operands { v0, v1, v2 }) + // KADDW k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, hlcode(v[1])) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KADDW") + } + return p +} + +// KANDB performs "Bitwise Logical AND 8-bit Masks". +// +// Mnemonic : KANDB +// Supported forms : (1 form) +// +// * KANDB k, k, k [AVX512DQ] +// +func (self *Program) KANDB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDB", 3, Operands { v0, v1, v2 }) + // KANDB k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, nil, hlcode(v[1])) + m.emit(0x41) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDB") + } + return p +} + +// KANDD performs "Bitwise Logical AND 32-bit Masks". +// +// Mnemonic : KANDD +// Supported forms : (1 form) +// +// * KANDD k, k, k [AVX512BW] +// +func (self *Program) KANDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDD", 3, Operands { v0, v1, v2 }) + // KANDD k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x41) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDD") + } + return p +} + +// KANDNB performs "Bitwise Logical AND NOT 8-bit Masks". +// +// Mnemonic : KANDNB +// Supported forms : (1 form) +// +// * KANDNB k, k, k [AVX512DQ] +// +func (self *Program) KANDNB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDNB", 3, Operands { v0, v1, v2 }) + // KANDNB k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, nil, hlcode(v[1])) + m.emit(0x42) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDNB") + } + return p +} + +// KANDND performs "Bitwise Logical AND NOT 32-bit Masks". +// +// Mnemonic : KANDND +// Supported forms : (1 form) +// +// * KANDND k, k, k [AVX512BW] +// +func (self *Program) KANDND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDND", 3, Operands { v0, v1, v2 }) + // KANDND k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x42) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDND") + } + return p +} + +// KANDNQ performs "Bitwise Logical AND NOT 64-bit Masks". +// +// Mnemonic : KANDNQ +// Supported forms : (1 form) +// +// * KANDNQ k, k, k [AVX512BW] +// +func (self *Program) KANDNQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDNQ", 3, Operands { v0, v1, v2 }) + // KANDNQ k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfc ^ (hlcode(v[1]) << 3)) + m.emit(0x42) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDNQ") + } + return p +} + +// KANDNW performs "Bitwise Logical AND NOT 16-bit Masks". +// +// Mnemonic : KANDNW +// Supported forms : (1 form) +// +// * KANDNW k, k, k [AVX512F] +// +func (self *Program) KANDNW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDNW", 3, Operands { v0, v1, v2 }) + // KANDNW k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, hlcode(v[1])) + m.emit(0x42) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDNW") + } + return p +} + +// KANDQ performs "Bitwise Logical AND 64-bit Masks". +// +// Mnemonic : KANDQ +// Supported forms : (1 form) +// +// * KANDQ k, k, k [AVX512BW] +// +func (self *Program) KANDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDQ", 3, Operands { v0, v1, v2 }) + // KANDQ k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfc ^ (hlcode(v[1]) << 3)) + m.emit(0x41) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDQ") + } + return p +} + +// KANDW performs "Bitwise Logical AND 16-bit Masks". +// +// Mnemonic : KANDW +// Supported forms : (1 form) +// +// * KANDW k, k, k [AVX512F] +// +func (self *Program) KANDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDW", 3, Operands { v0, v1, v2 }) + // KANDW k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, hlcode(v[1])) + m.emit(0x41) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDW") + } + return p +} + +// KMOVB performs "Move 8-bit Mask". +// +// Mnemonic : KMOVB +// Supported forms : (5 forms) +// +// * KMOVB k, k [AVX512DQ] +// * KMOVB r32, k [AVX512DQ] +// * KMOVB m8, k [AVX512DQ] +// * KMOVB k, r32 [AVX512DQ] +// * KMOVB k, m8 [AVX512DQ] +// +func (self *Program) KMOVB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KMOVB", 2, Operands { v0, v1 }) + // KMOVB k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, nil, 0) + m.emit(0x90) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVB r32, k + if isReg32(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[0], 0) + m.emit(0x92) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVB m8, k + if isM8(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, addr(v[0]), 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // KMOVB k, r32 + if isK(v0) && isReg32(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), nil, 0) + m.emit(0x93) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVB k, m8 + if isK(v0) && isM8(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, addr(v[1]), 0) + m.emit(0x91) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for KMOVB") + } + return p +} + +// KMOVD performs "Move 32-bit Mask". +// +// Mnemonic : KMOVD +// Supported forms : (5 forms) +// +// * KMOVD k, k [AVX512BW] +// * KMOVD r32, k [AVX512BW] +// * KMOVD m32, k [AVX512BW] +// * KMOVD k, r32 [AVX512BW] +// * KMOVD k, m32 [AVX512BW] +// +func (self *Program) KMOVD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KMOVD", 2, Operands { v0, v1 }) + // KMOVD k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf9) + m.emit(0x90) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVD r32, k + if isReg32(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, 0, v[0], 0) + m.emit(0x92) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVD m32, k + if isM32(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x81, 0, addr(v[0]), 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // KMOVD k, r32 + if isK(v0) && isReg32(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), nil, 0) + m.emit(0x93) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVD k, m32 + if isK(v0) && isM32(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x81, 0, addr(v[1]), 0) + m.emit(0x91) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for KMOVD") + } + return p +} + +// KMOVQ performs "Move 64-bit Mask". +// +// Mnemonic : KMOVQ +// Supported forms : (5 forms) +// +// * KMOVQ k, k [AVX512BW] +// * KMOVQ r64, k [AVX512BW] +// * KMOVQ m64, k [AVX512BW] +// * KMOVQ k, r64 [AVX512BW] +// * KMOVQ k, m64 [AVX512BW] +// +func (self *Program) KMOVQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KMOVQ", 2, Operands { v0, v1 }) + // KMOVQ k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf8) + m.emit(0x90) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVQ r64, k + if isReg64(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[0]) << 5)) + m.emit(0xfb) + m.emit(0x92) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVQ m64, k + if isM64(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x80, 0, addr(v[0]), 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // KMOVQ k, r64 + if isK(v0) && isReg64(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[1]) << 7)) + m.emit(0xfb) + m.emit(0x93) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVQ k, m64 + if isK(v0) && isM64(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x80, 0, addr(v[1]), 0) + m.emit(0x91) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for KMOVQ") + } + return p +} + +// KMOVW performs "Move 16-bit Mask". +// +// Mnemonic : KMOVW +// Supported forms : (5 forms) +// +// * KMOVW k, k [AVX512F] +// * KMOVW r32, k [AVX512F] +// * KMOVW m16, k [AVX512F] +// * KMOVW k, r32 [AVX512F] +// * KMOVW k, m16 [AVX512F] +// +func (self *Program) KMOVW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KMOVW", 2, Operands { v0, v1 }) + // KMOVW k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, nil, 0) + m.emit(0x90) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVW r32, k + if isReg32(v0) && isK(v1) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, v[0], 0) + m.emit(0x92) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVW m16, k + if isM16(v0) && isK(v1) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, addr(v[0]), 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // KMOVW k, r32 + if isK(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), nil, 0) + m.emit(0x93) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVW k, m16 + if isK(v0) && isM16(v1) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, addr(v[1]), 0) + m.emit(0x91) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for KMOVW") + } + return p +} + +// KNOTB performs "NOT 8-bit Mask Register". +// +// Mnemonic : KNOTB +// Supported forms : (1 form) +// +// * KNOTB k, k [AVX512DQ] +// +func (self *Program) KNOTB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KNOTB", 2, Operands { v0, v1 }) + // KNOTB k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, nil, 0) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KNOTB") + } + return p +} + +// KNOTD performs "NOT 32-bit Mask Register". +// +// Mnemonic : KNOTD +// Supported forms : (1 form) +// +// * KNOTD k, k [AVX512BW] +// +func (self *Program) KNOTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KNOTD", 2, Operands { v0, v1 }) + // KNOTD k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf9) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KNOTD") + } + return p +} + +// KNOTQ performs "NOT 64-bit Mask Register". +// +// Mnemonic : KNOTQ +// Supported forms : (1 form) +// +// * KNOTQ k, k [AVX512BW] +// +func (self *Program) KNOTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KNOTQ", 2, Operands { v0, v1 }) + // KNOTQ k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf8) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KNOTQ") + } + return p +} + +// KNOTW performs "NOT 16-bit Mask Register". +// +// Mnemonic : KNOTW +// Supported forms : (1 form) +// +// * KNOTW k, k [AVX512F] +// +func (self *Program) KNOTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KNOTW", 2, Operands { v0, v1 }) + // KNOTW k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, nil, 0) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KNOTW") + } + return p +} + +// KORB performs "Bitwise Logical OR 8-bit Masks". +// +// Mnemonic : KORB +// Supported forms : (1 form) +// +// * KORB k, k, k [AVX512DQ] +// +func (self *Program) KORB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KORB", 3, Operands { v0, v1, v2 }) + // KORB k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, nil, hlcode(v[1])) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORB") + } + return p +} + +// KORD performs "Bitwise Logical OR 32-bit Masks". +// +// Mnemonic : KORD +// Supported forms : (1 form) +// +// * KORD k, k, k [AVX512BW] +// +func (self *Program) KORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KORD", 3, Operands { v0, v1, v2 }) + // KORD k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORD") + } + return p +} + +// KORQ performs "Bitwise Logical OR 64-bit Masks". +// +// Mnemonic : KORQ +// Supported forms : (1 form) +// +// * KORQ k, k, k [AVX512BW] +// +func (self *Program) KORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KORQ", 3, Operands { v0, v1, v2 }) + // KORQ k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfc ^ (hlcode(v[1]) << 3)) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORQ") + } + return p +} + +// KORTESTB performs "OR 8-bit Masks and Set Flags". +// +// Mnemonic : KORTESTB +// Supported forms : (1 form) +// +// * KORTESTB k, k [AVX512DQ] +// +func (self *Program) KORTESTB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KORTESTB", 2, Operands { v0, v1 }) + // KORTESTB k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, nil, 0) + m.emit(0x98) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORTESTB") + } + return p +} + +// KORTESTD performs "OR 32-bit Masks and Set Flags". +// +// Mnemonic : KORTESTD +// Supported forms : (1 form) +// +// * KORTESTD k, k [AVX512BW] +// +func (self *Program) KORTESTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KORTESTD", 2, Operands { v0, v1 }) + // KORTESTD k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf9) + m.emit(0x98) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORTESTD") + } + return p +} + +// KORTESTQ performs "OR 64-bit Masks and Set Flags". +// +// Mnemonic : KORTESTQ +// Supported forms : (1 form) +// +// * KORTESTQ k, k [AVX512BW] +// +func (self *Program) KORTESTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KORTESTQ", 2, Operands { v0, v1 }) + // KORTESTQ k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf8) + m.emit(0x98) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORTESTQ") + } + return p +} + +// KORTESTW performs "OR 16-bit Masks and Set Flags". +// +// Mnemonic : KORTESTW +// Supported forms : (1 form) +// +// * KORTESTW k, k [AVX512F] +// +func (self *Program) KORTESTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KORTESTW", 2, Operands { v0, v1 }) + // KORTESTW k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, nil, 0) + m.emit(0x98) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORTESTW") + } + return p +} + +// KORW performs "Bitwise Logical OR 16-bit Masks". +// +// Mnemonic : KORW +// Supported forms : (1 form) +// +// * KORW k, k, k [AVX512F] +// +func (self *Program) KORW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KORW", 3, Operands { v0, v1, v2 }) + // KORW k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, hlcode(v[1])) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORW") + } + return p +} + +// KSHIFTLB performs "Shift Left 8-bit Masks". +// +// Mnemonic : KSHIFTLB +// Supported forms : (1 form) +// +// * KSHIFTLB imm8, k, k [AVX512DQ] +// +func (self *Program) KSHIFTLB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTLB", 3, Operands { v0, v1, v2 }) + // KSHIFTLB imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0x79) + m.emit(0x32) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTLB") + } + return p +} + +// KSHIFTLD performs "Shift Left 32-bit Masks". +// +// Mnemonic : KSHIFTLD +// Supported forms : (1 form) +// +// * KSHIFTLD imm8, k, k [AVX512BW] +// +func (self *Program) KSHIFTLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTLD", 3, Operands { v0, v1, v2 }) + // KSHIFTLD imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0x79) + m.emit(0x33) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTLD") + } + return p +} + +// KSHIFTLQ performs "Shift Left 64-bit Masks". +// +// Mnemonic : KSHIFTLQ +// Supported forms : (1 form) +// +// * KSHIFTLQ imm8, k, k [AVX512BW] +// +func (self *Program) KSHIFTLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTLQ", 3, Operands { v0, v1, v2 }) + // KSHIFTLQ imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0xf9) + m.emit(0x33) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTLQ") + } + return p +} + +// KSHIFTLW performs "Shift Left 16-bit Masks". +// +// Mnemonic : KSHIFTLW +// Supported forms : (1 form) +// +// * KSHIFTLW imm8, k, k [AVX512F] +// +func (self *Program) KSHIFTLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTLW", 3, Operands { v0, v1, v2 }) + // KSHIFTLW imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0xf9) + m.emit(0x32) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTLW") + } + return p +} + +// KSHIFTRB performs "Shift Right 8-bit Masks". +// +// Mnemonic : KSHIFTRB +// Supported forms : (1 form) +// +// * KSHIFTRB imm8, k, k [AVX512DQ] +// +func (self *Program) KSHIFTRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTRB", 3, Operands { v0, v1, v2 }) + // KSHIFTRB imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0x79) + m.emit(0x30) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTRB") + } + return p +} + +// KSHIFTRD performs "Shift Right 32-bit Masks". +// +// Mnemonic : KSHIFTRD +// Supported forms : (1 form) +// +// * KSHIFTRD imm8, k, k [AVX512BW] +// +func (self *Program) KSHIFTRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTRD", 3, Operands { v0, v1, v2 }) + // KSHIFTRD imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0x79) + m.emit(0x31) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTRD") + } + return p +} + +// KSHIFTRQ performs "Shift Right 64-bit Masks". +// +// Mnemonic : KSHIFTRQ +// Supported forms : (1 form) +// +// * KSHIFTRQ imm8, k, k [AVX512BW] +// +func (self *Program) KSHIFTRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTRQ", 3, Operands { v0, v1, v2 }) + // KSHIFTRQ imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0xf9) + m.emit(0x31) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTRQ") + } + return p +} + +// KSHIFTRW performs "Shift Right 16-bit Masks". +// +// Mnemonic : KSHIFTRW +// Supported forms : (1 form) +// +// * KSHIFTRW imm8, k, k [AVX512F] +// +func (self *Program) KSHIFTRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTRW", 3, Operands { v0, v1, v2 }) + // KSHIFTRW imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0xf9) + m.emit(0x30) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTRW") + } + return p +} + +// KTESTB performs "Bit Test 8-bit Masks and Set Flags". +// +// Mnemonic : KTESTB +// Supported forms : (1 form) +// +// * KTESTB k, k [AVX512DQ] +// +func (self *Program) KTESTB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KTESTB", 2, Operands { v0, v1 }) + // KTESTB k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, nil, 0) + m.emit(0x99) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KTESTB") + } + return p +} + +// KTESTD performs "Bit Test 32-bit Masks and Set Flags". +// +// Mnemonic : KTESTD +// Supported forms : (1 form) +// +// * KTESTD k, k [AVX512BW] +// +func (self *Program) KTESTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KTESTD", 2, Operands { v0, v1 }) + // KTESTD k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf9) + m.emit(0x99) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KTESTD") + } + return p +} + +// KTESTQ performs "Bit Test 64-bit Masks and Set Flags". +// +// Mnemonic : KTESTQ +// Supported forms : (1 form) +// +// * KTESTQ k, k [AVX512BW] +// +func (self *Program) KTESTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KTESTQ", 2, Operands { v0, v1 }) + // KTESTQ k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf8) + m.emit(0x99) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KTESTQ") + } + return p +} + +// KTESTW performs "Bit Test 16-bit Masks and Set Flags". +// +// Mnemonic : KTESTW +// Supported forms : (1 form) +// +// * KTESTW k, k [AVX512DQ] +// +func (self *Program) KTESTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KTESTW", 2, Operands { v0, v1 }) + // KTESTW k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, nil, 0) + m.emit(0x99) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KTESTW") + } + return p +} + +// KUNPCKBW performs "Unpack and Interleave 8-bit Masks". +// +// Mnemonic : KUNPCKBW +// Supported forms : (1 form) +// +// * KUNPCKBW k, k, k [AVX512F] +// +func (self *Program) KUNPCKBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KUNPCKBW", 3, Operands { v0, v1, v2 }) + // KUNPCKBW k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, nil, hlcode(v[1])) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KUNPCKBW") + } + return p +} + +// KUNPCKDQ performs "Unpack and Interleave 32-bit Masks". +// +// Mnemonic : KUNPCKDQ +// Supported forms : (1 form) +// +// * KUNPCKDQ k, k, k [AVX512BW] +// +func (self *Program) KUNPCKDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KUNPCKDQ", 3, Operands { v0, v1, v2 }) + // KUNPCKDQ k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfc ^ (hlcode(v[1]) << 3)) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KUNPCKDQ") + } + return p +} + +// KUNPCKWD performs "Unpack and Interleave 16-bit Masks". +// +// Mnemonic : KUNPCKWD +// Supported forms : (1 form) +// +// * KUNPCKWD k, k, k [AVX512BW] +// +func (self *Program) KUNPCKWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KUNPCKWD", 3, Operands { v0, v1, v2 }) + // KUNPCKWD k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, hlcode(v[1])) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KUNPCKWD") + } + return p +} + +// KXNORB performs "Bitwise Logical XNOR 8-bit Masks". +// +// Mnemonic : KXNORB +// Supported forms : (1 form) +// +// * KXNORB k, k, k [AVX512DQ] +// +func (self *Program) KXNORB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXNORB", 3, Operands { v0, v1, v2 }) + // KXNORB k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, nil, hlcode(v[1])) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXNORB") + } + return p +} + +// KXNORD performs "Bitwise Logical XNOR 32-bit Masks". +// +// Mnemonic : KXNORD +// Supported forms : (1 form) +// +// * KXNORD k, k, k [AVX512BW] +// +func (self *Program) KXNORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXNORD", 3, Operands { v0, v1, v2 }) + // KXNORD k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXNORD") + } + return p +} + +// KXNORQ performs "Bitwise Logical XNOR 64-bit Masks". +// +// Mnemonic : KXNORQ +// Supported forms : (1 form) +// +// * KXNORQ k, k, k [AVX512BW] +// +func (self *Program) KXNORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXNORQ", 3, Operands { v0, v1, v2 }) + // KXNORQ k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfc ^ (hlcode(v[1]) << 3)) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXNORQ") + } + return p +} + +// KXNORW performs "Bitwise Logical XNOR 16-bit Masks". +// +// Mnemonic : KXNORW +// Supported forms : (1 form) +// +// * KXNORW k, k, k [AVX512F] +// +func (self *Program) KXNORW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXNORW", 3, Operands { v0, v1, v2 }) + // KXNORW k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, hlcode(v[1])) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXNORW") + } + return p +} + +// KXORB performs "Bitwise Logical XOR 8-bit Masks". +// +// Mnemonic : KXORB +// Supported forms : (1 form) +// +// * KXORB k, k, k [AVX512DQ] +// +func (self *Program) KXORB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXORB", 3, Operands { v0, v1, v2 }) + // KXORB k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, nil, hlcode(v[1])) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXORB") + } + return p +} + +// KXORD performs "Bitwise Logical XOR 32-bit Masks". +// +// Mnemonic : KXORD +// Supported forms : (1 form) +// +// * KXORD k, k, k [AVX512BW] +// +func (self *Program) KXORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXORD", 3, Operands { v0, v1, v2 }) + // KXORD k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXORD") + } + return p +} + +// KXORQ performs "Bitwise Logical XOR 64-bit Masks". +// +// Mnemonic : KXORQ +// Supported forms : (1 form) +// +// * KXORQ k, k, k [AVX512BW] +// +func (self *Program) KXORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXORQ", 3, Operands { v0, v1, v2 }) + // KXORQ k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfc ^ (hlcode(v[1]) << 3)) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXORQ") + } + return p +} + +// KXORW performs "Bitwise Logical XOR 16-bit Masks". +// +// Mnemonic : KXORW +// Supported forms : (1 form) +// +// * KXORW k, k, k [AVX512F] +// +func (self *Program) KXORW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXORW", 3, Operands { v0, v1, v2 }) + // KXORW k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, hlcode(v[1])) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXORW") + } + return p +} + +// LDDQU performs "Load Unaligned Integer 128 Bits". +// +// Mnemonic : LDDQU +// Supported forms : (1 form) +// +// * LDDQU m128, xmm [SSE3] +// +func (self *Program) LDDQU(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("LDDQU", 2, Operands { v0, v1 }) + // LDDQU m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LDDQU") + } + return p +} + +// LDMXCSR performs "Load MXCSR Register". +// +// Mnemonic : LDMXCSR +// Supported forms : (1 form) +// +// * LDMXCSR m32 [SSE] +// +func (self *Program) LDMXCSR(v0 interface{}) *Instruction { + p := self.alloc("LDMXCSR", 1, Operands { v0 }) + // LDMXCSR m32 + if isM32(v0) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0xae) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LDMXCSR") + } + return p +} + +// LEAL performs "Load Effective Address". +// +// Mnemonic : LEA +// Supported forms : (1 form) +// +// * LEAL m, r32 +// +func (self *Program) LEAL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("LEAL", 2, Operands { v0, v1 }) + // LEAL m, r32 + if isM(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x8d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LEAL") + } + return p +} + +// LEAQ performs "Load Effective Address". +// +// Mnemonic : LEA +// Supported forms : (1 form) +// +// * LEAQ m, r64 +// +func (self *Program) LEAQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("LEAQ", 2, Operands { v0, v1 }) + // LEAQ m, r64 + if isM(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x8d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LEAQ") + } + return p +} + +// LEAW performs "Load Effective Address". +// +// Mnemonic : LEA +// Supported forms : (1 form) +// +// * LEAW m, r16 +// +func (self *Program) LEAW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("LEAW", 2, Operands { v0, v1 }) + // LEAW m, r16 + if isM(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x8d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LEAW") + } + return p +} + +// LFENCE performs "Load Fence". +// +// Mnemonic : LFENCE +// Supported forms : (1 form) +// +// * LFENCE [SSE2] +// +func (self *Program) LFENCE() *Instruction { + p := self.alloc("LFENCE", 0, Operands { }) + // LFENCE + self.require(ISA_SSE2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0xae) + m.emit(0xe8) + }) + return p +} + +// LZCNTL performs "Count the Number of Leading Zero Bits". +// +// Mnemonic : LZCNT +// Supported forms : (2 forms) +// +// * LZCNTL r32, r32 [LZCNT] +// * LZCNTL m32, r32 [LZCNT] +// +func (self *Program) LZCNTL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("LZCNTL", 2, Operands { v0, v1 }) + // LZCNTL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_LZCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // LZCNTL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_LZCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LZCNTL") + } + return p +} + +// LZCNTQ performs "Count the Number of Leading Zero Bits". +// +// Mnemonic : LZCNT +// Supported forms : (2 forms) +// +// * LZCNTQ r64, r64 [LZCNT] +// * LZCNTQ m64, r64 [LZCNT] +// +func (self *Program) LZCNTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("LZCNTQ", 2, Operands { v0, v1 }) + // LZCNTQ r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_LZCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // LZCNTQ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_LZCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xbd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LZCNTQ") + } + return p +} + +// LZCNTW performs "Count the Number of Leading Zero Bits". +// +// Mnemonic : LZCNT +// Supported forms : (2 forms) +// +// * LZCNTW r16, r16 [LZCNT] +// * LZCNTW m16, r16 [LZCNT] +// +func (self *Program) LZCNTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("LZCNTW", 2, Operands { v0, v1 }) + // LZCNTW r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_LZCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // LZCNTW m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_LZCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LZCNTW") + } + return p +} + +// MASKMOVDQU performs "Store Selected Bytes of Double Quadword". +// +// Mnemonic : MASKMOVDQU +// Supported forms : (1 form) +// +// * MASKMOVDQU xmm, xmm [SSE2] +// +func (self *Program) MASKMOVDQU(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MASKMOVDQU", 2, Operands { v0, v1 }) + // MASKMOVDQU xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MASKMOVDQU") + } + return p +} + +// MASKMOVQ performs "Store Selected Bytes of Quadword". +// +// Mnemonic : MASKMOVQ +// Supported forms : (1 form) +// +// * MASKMOVQ mm, mm [MMX+] +// +func (self *Program) MASKMOVQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MASKMOVQ", 2, Operands { v0, v1 }) + // MASKMOVQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MASKMOVQ") + } + return p +} + +// MAXPD performs "Return Maximum Packed Double-Precision Floating-Point Values". +// +// Mnemonic : MAXPD +// Supported forms : (2 forms) +// +// * MAXPD xmm, xmm [SSE2] +// * MAXPD m128, xmm [SSE2] +// +func (self *Program) MAXPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MAXPD", 2, Operands { v0, v1 }) + // MAXPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MAXPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MAXPD") + } + return p +} + +// MAXPS performs "Return Maximum Packed Single-Precision Floating-Point Values". +// +// Mnemonic : MAXPS +// Supported forms : (2 forms) +// +// * MAXPS xmm, xmm [SSE] +// * MAXPS m128, xmm [SSE] +// +func (self *Program) MAXPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MAXPS", 2, Operands { v0, v1 }) + // MAXPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MAXPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MAXPS") + } + return p +} + +// MAXSD performs "Return Maximum Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : MAXSD +// Supported forms : (2 forms) +// +// * MAXSD xmm, xmm [SSE2] +// * MAXSD m64, xmm [SSE2] +// +func (self *Program) MAXSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MAXSD", 2, Operands { v0, v1 }) + // MAXSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MAXSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MAXSD") + } + return p +} + +// MAXSS performs "Return Maximum Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : MAXSS +// Supported forms : (2 forms) +// +// * MAXSS xmm, xmm [SSE] +// * MAXSS m32, xmm [SSE] +// +func (self *Program) MAXSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MAXSS", 2, Operands { v0, v1 }) + // MAXSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MAXSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MAXSS") + } + return p +} + +// MFENCE performs "Memory Fence". +// +// Mnemonic : MFENCE +// Supported forms : (1 form) +// +// * MFENCE [SSE2] +// +func (self *Program) MFENCE() *Instruction { + p := self.alloc("MFENCE", 0, Operands { }) + // MFENCE + self.require(ISA_SSE2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0xae) + m.emit(0xf0) + }) + return p +} + +// MINPD performs "Return Minimum Packed Double-Precision Floating-Point Values". +// +// Mnemonic : MINPD +// Supported forms : (2 forms) +// +// * MINPD xmm, xmm [SSE2] +// * MINPD m128, xmm [SSE2] +// +func (self *Program) MINPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MINPD", 2, Operands { v0, v1 }) + // MINPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MINPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MINPD") + } + return p +} + +// MINPS performs "Return Minimum Packed Single-Precision Floating-Point Values". +// +// Mnemonic : MINPS +// Supported forms : (2 forms) +// +// * MINPS xmm, xmm [SSE] +// * MINPS m128, xmm [SSE] +// +func (self *Program) MINPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MINPS", 2, Operands { v0, v1 }) + // MINPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MINPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MINPS") + } + return p +} + +// MINSD performs "Return Minimum Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : MINSD +// Supported forms : (2 forms) +// +// * MINSD xmm, xmm [SSE2] +// * MINSD m64, xmm [SSE2] +// +func (self *Program) MINSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MINSD", 2, Operands { v0, v1 }) + // MINSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MINSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MINSD") + } + return p +} + +// MINSS performs "Return Minimum Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : MINSS +// Supported forms : (2 forms) +// +// * MINSS xmm, xmm [SSE] +// * MINSS m32, xmm [SSE] +// +func (self *Program) MINSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MINSS", 2, Operands { v0, v1 }) + // MINSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MINSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MINSS") + } + return p +} + +// MONITOR performs "Monitor a Linear Address Range". +// +// Mnemonic : MONITOR +// Supported forms : (1 form) +// +// * MONITOR [MONITOR] +// +func (self *Program) MONITOR() *Instruction { + p := self.alloc("MONITOR", 0, Operands { }) + // MONITOR + self.require(ISA_MONITOR) + p.domain = DomainMisc + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x01) + m.emit(0xc8) + }) + return p +} + +// MONITORX performs "Monitor a Linear Address Range with Timeout". +// +// Mnemonic : MONITORX +// Supported forms : (1 form) +// +// * MONITORX [MONITORX] +// +func (self *Program) MONITORX() *Instruction { + p := self.alloc("MONITORX", 0, Operands { }) + // MONITORX + self.require(ISA_MONITORX) + p.domain = DomainMisc + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x01) + m.emit(0xfa) + }) + return p +} + +// MOVAPD performs "Move Aligned Packed Double-Precision Floating-Point Values". +// +// Mnemonic : MOVAPD +// Supported forms : (3 forms) +// +// * MOVAPD xmm, xmm [SSE2] +// * MOVAPD m128, xmm [SSE2] +// * MOVAPD xmm, m128 [SSE2] +// +func (self *Program) MOVAPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVAPD", 2, Operands { v0, v1 }) + // MOVAPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVAPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVAPD xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVAPD") + } + return p +} + +// MOVAPS performs "Move Aligned Packed Single-Precision Floating-Point Values". +// +// Mnemonic : MOVAPS +// Supported forms : (3 forms) +// +// * MOVAPS xmm, xmm [SSE] +// * MOVAPS m128, xmm [SSE] +// * MOVAPS xmm, m128 [SSE] +// +func (self *Program) MOVAPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVAPS", 2, Operands { v0, v1 }) + // MOVAPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVAPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVAPS xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVAPS") + } + return p +} + +// MOVB performs "Move". +// +// Mnemonic : MOV +// Supported forms : (5 forms) +// +// * MOVB imm8, r8 +// * MOVB r8, r8 +// * MOVB m8, r8 +// * MOVB imm8, m8 +// * MOVB r8, m8 +// +func (self *Program) MOVB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVB", 2, Operands { v0, v1 }) + // MOVB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xb0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // MOVB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x88) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x8a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x8a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc6) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // MOVB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x88) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVB") + } + return p +} + +// MOVBEL performs "Move Data After Swapping Bytes". +// +// Mnemonic : MOVBE +// Supported forms : (2 forms) +// +// * MOVBEL m32, r32 [MOVBE] +// * MOVBEL r32, m32 [MOVBE] +// +func (self *Program) MOVBEL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVBEL", 2, Operands { v0, v1 }) + // MOVBEL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_MOVBE) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVBEL r32, m32 + if isReg32(v0) && isM32(v1) { + self.require(ISA_MOVBE) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVBEL") + } + return p +} + +// MOVBEQ performs "Move Data After Swapping Bytes". +// +// Mnemonic : MOVBE +// Supported forms : (2 forms) +// +// * MOVBEQ m64, r64 [MOVBE] +// * MOVBEQ r64, m64 [MOVBE] +// +func (self *Program) MOVBEQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVBEQ", 2, Operands { v0, v1 }) + // MOVBEQ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_MOVBE) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVBEQ r64, m64 + if isReg64(v0) && isM64(v1) { + self.require(ISA_MOVBE) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVBEQ") + } + return p +} + +// MOVBEW performs "Move Data After Swapping Bytes". +// +// Mnemonic : MOVBE +// Supported forms : (2 forms) +// +// * MOVBEW m16, r16 [MOVBE] +// * MOVBEW r16, m16 [MOVBE] +// +func (self *Program) MOVBEW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVBEW", 2, Operands { v0, v1 }) + // MOVBEW m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_MOVBE) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVBEW r16, m16 + if isReg16(v0) && isM16(v1) { + self.require(ISA_MOVBE) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVBEW") + } + return p +} + +// MOVD performs "Move Doubleword". +// +// Mnemonic : MOVD +// Supported forms : (8 forms) +// +// * MOVD mm, r32 [MMX] +// * MOVD r32, mm [MMX] +// * MOVD m32, mm [MMX] +// * MOVD mm, m32 [MMX] +// * MOVD xmm, r32 [SSE2] +// * MOVD r32, xmm [SSE2] +// * MOVD m32, xmm [SSE2] +// * MOVD xmm, m32 [SSE2] +// +func (self *Program) MOVD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVD", 2, Operands { v0, v1 }) + // MOVD mm, r32 + if isMM(v0) && isReg32(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVD r32, mm + if isReg32(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVD m32, mm + if isM32(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVD mm, m32 + if isMM(v0) && isM32(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // MOVD xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVD r32, xmm + if isReg32(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVD xmm, m32 + if isXMM(v0) && isM32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVD") + } + return p +} + +// MOVDDUP performs "Move One Double-FP and Duplicate". +// +// Mnemonic : MOVDDUP +// Supported forms : (2 forms) +// +// * MOVDDUP xmm, xmm [SSE3] +// * MOVDDUP m64, xmm [SSE3] +// +func (self *Program) MOVDDUP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVDDUP", 2, Operands { v0, v1 }) + // MOVDDUP xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVDDUP m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVDDUP") + } + return p +} + +// MOVDQ2Q performs "Move Quadword from XMM to MMX Technology Register". +// +// Mnemonic : MOVDQ2Q +// Supported forms : (1 form) +// +// * MOVDQ2Q xmm, mm [SSE2] +// +func (self *Program) MOVDQ2Q(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVDQ2Q", 2, Operands { v0, v1 }) + // MOVDQ2Q xmm, mm + if isXMM(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MOVDQ2Q") + } + return p +} + +// MOVDQA performs "Move Aligned Double Quadword". +// +// Mnemonic : MOVDQA +// Supported forms : (3 forms) +// +// * MOVDQA xmm, xmm [SSE2] +// * MOVDQA m128, xmm [SSE2] +// * MOVDQA xmm, m128 [SSE2] +// +func (self *Program) MOVDQA(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVDQA", 2, Operands { v0, v1 }) + // MOVDQA xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVDQA m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVDQA xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVDQA") + } + return p +} + +// MOVDQU performs "Move Unaligned Double Quadword". +// +// Mnemonic : MOVDQU +// Supported forms : (3 forms) +// +// * MOVDQU xmm, xmm [SSE2] +// * MOVDQU m128, xmm [SSE2] +// * MOVDQU xmm, m128 [SSE2] +// +func (self *Program) MOVDQU(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVDQU", 2, Operands { v0, v1 }) + // MOVDQU xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVDQU m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVDQU xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVDQU") + } + return p +} + +// MOVHLPS performs "Move Packed Single-Precision Floating-Point Values High to Low". +// +// Mnemonic : MOVHLPS +// Supported forms : (1 form) +// +// * MOVHLPS xmm, xmm [SSE] +// +func (self *Program) MOVHLPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVHLPS", 2, Operands { v0, v1 }) + // MOVHLPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MOVHLPS") + } + return p +} + +// MOVHPD performs "Move High Packed Double-Precision Floating-Point Value". +// +// Mnemonic : MOVHPD +// Supported forms : (2 forms) +// +// * MOVHPD m64, xmm [SSE2] +// * MOVHPD xmm, m64 [SSE2] +// +func (self *Program) MOVHPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVHPD", 2, Operands { v0, v1 }) + // MOVHPD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVHPD xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x17) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVHPD") + } + return p +} + +// MOVHPS performs "Move High Packed Single-Precision Floating-Point Values". +// +// Mnemonic : MOVHPS +// Supported forms : (2 forms) +// +// * MOVHPS m64, xmm [SSE] +// * MOVHPS xmm, m64 [SSE] +// +func (self *Program) MOVHPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVHPS", 2, Operands { v0, v1 }) + // MOVHPS m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVHPS xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x17) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVHPS") + } + return p +} + +// MOVL performs "Move". +// +// Mnemonic : MOV +// Supported forms : (5 forms) +// +// * MOVL imm32, r32 +// * MOVL r32, r32 +// * MOVL m32, r32 +// * MOVL imm32, m32 +// * MOVL r32, m32 +// +func (self *Program) MOVL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVL", 2, Operands { v0, v1 }) + // MOVL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc7) + m.emit(0xc0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xb8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // MOVL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x89) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x8b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc7) + m.mrsd(0, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // MOVL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x89) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVL") + } + return p +} + +// MOVLHPS performs "Move Packed Single-Precision Floating-Point Values Low to High". +// +// Mnemonic : MOVLHPS +// Supported forms : (1 form) +// +// * MOVLHPS xmm, xmm [SSE] +// +func (self *Program) MOVLHPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVLHPS", 2, Operands { v0, v1 }) + // MOVLHPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MOVLHPS") + } + return p +} + +// MOVLPD performs "Move Low Packed Double-Precision Floating-Point Value". +// +// Mnemonic : MOVLPD +// Supported forms : (2 forms) +// +// * MOVLPD m64, xmm [SSE2] +// * MOVLPD xmm, m64 [SSE2] +// +func (self *Program) MOVLPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVLPD", 2, Operands { v0, v1 }) + // MOVLPD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVLPD xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVLPD") + } + return p +} + +// MOVLPS performs "Move Low Packed Single-Precision Floating-Point Values". +// +// Mnemonic : MOVLPS +// Supported forms : (2 forms) +// +// * MOVLPS m64, xmm [SSE] +// * MOVLPS xmm, m64 [SSE] +// +func (self *Program) MOVLPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVLPS", 2, Operands { v0, v1 }) + // MOVLPS m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVLPS xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVLPS") + } + return p +} + +// MOVMSKPD performs "Extract Packed Double-Precision Floating-Point Sign Mask". +// +// Mnemonic : MOVMSKPD +// Supported forms : (1 form) +// +// * MOVMSKPD xmm, r32 [SSE2] +// +func (self *Program) MOVMSKPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVMSKPD", 2, Operands { v0, v1 }) + // MOVMSKPD xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x50) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MOVMSKPD") + } + return p +} + +// MOVMSKPS performs "Extract Packed Single-Precision Floating-Point Sign Mask". +// +// Mnemonic : MOVMSKPS +// Supported forms : (1 form) +// +// * MOVMSKPS xmm, r32 [SSE] +// +func (self *Program) MOVMSKPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVMSKPS", 2, Operands { v0, v1 }) + // MOVMSKPS xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x50) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MOVMSKPS") + } + return p +} + +// MOVNTDQ performs "Store Double Quadword Using Non-Temporal Hint". +// +// Mnemonic : MOVNTDQ +// Supported forms : (1 form) +// +// * MOVNTDQ xmm, m128 [SSE2] +// +func (self *Program) MOVNTDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTDQ", 2, Operands { v0, v1 }) + // MOVNTDQ xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xe7) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTDQ") + } + return p +} + +// MOVNTDQA performs "Load Double Quadword Non-Temporal Aligned Hint". +// +// Mnemonic : MOVNTDQA +// Supported forms : (1 form) +// +// * MOVNTDQA m128, xmm [SSE4.1] +// +func (self *Program) MOVNTDQA(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTDQA", 2, Operands { v0, v1 }) + // MOVNTDQA m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTDQA") + } + return p +} + +// MOVNTIL performs "Store Doubleword Using Non-Temporal Hint". +// +// Mnemonic : MOVNTI +// Supported forms : (1 form) +// +// * MOVNTIL r32, m32 [SSE2] +// +func (self *Program) MOVNTIL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTIL", 2, Operands { v0, v1 }) + // MOVNTIL r32, m32 + if isReg32(v0) && isM32(v1) { + self.require(ISA_SSE2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTIL") + } + return p +} + +// MOVNTIQ performs "Store Doubleword Using Non-Temporal Hint". +// +// Mnemonic : MOVNTI +// Supported forms : (1 form) +// +// * MOVNTIQ r64, m64 [SSE2] +// +func (self *Program) MOVNTIQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTIQ", 2, Operands { v0, v1 }) + // MOVNTIQ r64, m64 + if isReg64(v0) && isM64(v1) { + self.require(ISA_SSE2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0xc3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTIQ") + } + return p +} + +// MOVNTPD performs "Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint". +// +// Mnemonic : MOVNTPD +// Supported forms : (1 form) +// +// * MOVNTPD xmm, m128 [SSE2] +// +func (self *Program) MOVNTPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTPD", 2, Operands { v0, v1 }) + // MOVNTPD xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTPD") + } + return p +} + +// MOVNTPS performs "Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint". +// +// Mnemonic : MOVNTPS +// Supported forms : (1 form) +// +// * MOVNTPS xmm, m128 [SSE] +// +func (self *Program) MOVNTPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTPS", 2, Operands { v0, v1 }) + // MOVNTPS xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTPS") + } + return p +} + +// MOVNTQ performs "Store of Quadword Using Non-Temporal Hint". +// +// Mnemonic : MOVNTQ +// Supported forms : (1 form) +// +// * MOVNTQ mm, m64 [MMX+] +// +func (self *Program) MOVNTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTQ", 2, Operands { v0, v1 }) + // MOVNTQ mm, m64 + if isMM(v0) && isM64(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xe7) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTQ") + } + return p +} + +// MOVNTSD performs "Store Scalar Double-Precision Floating-Point Values Using Non-Temporal Hint". +// +// Mnemonic : MOVNTSD +// Supported forms : (1 form) +// +// * MOVNTSD xmm, m64 [SSE4A] +// +func (self *Program) MOVNTSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTSD", 2, Operands { v0, v1 }) + // MOVNTSD xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_SSE4A) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTSD") + } + return p +} + +// MOVNTSS performs "Store Scalar Single-Precision Floating-Point Values Using Non-Temporal Hint". +// +// Mnemonic : MOVNTSS +// Supported forms : (1 form) +// +// * MOVNTSS xmm, m32 [SSE4A] +// +func (self *Program) MOVNTSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTSS", 2, Operands { v0, v1 }) + // MOVNTSS xmm, m32 + if isXMM(v0) && isM32(v1) { + self.require(ISA_SSE4A) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTSS") + } + return p +} + +// MOVQ performs "Move". +// +// Mnemonic : MOV +// Supported forms : (16 forms) +// +// * MOVQ imm32, r64 +// * MOVQ imm64, r64 +// * MOVQ r64, r64 +// * MOVQ m64, r64 +// * MOVQ imm32, m64 +// * MOVQ r64, m64 +// * MOVQ mm, r64 [MMX] +// * MOVQ r64, mm [MMX] +// * MOVQ mm, mm [MMX] +// * MOVQ m64, mm [MMX] +// * MOVQ mm, m64 [MMX] +// * MOVQ xmm, r64 [SSE2] +// * MOVQ r64, xmm [SSE2] +// * MOVQ xmm, xmm [SSE2] +// * MOVQ m64, xmm [SSE2] +// * MOVQ xmm, m64 [SSE2] +// +func (self *Program) MOVQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVQ", 2, Operands { v0, v1 }) + // MOVQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc7) + m.emit(0xc0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // MOVQ imm64, r64 + if isImm64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xb8 | lcode(v[1])) + m.imm8(toImmAny(v[0])) + }) + } + // MOVQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x89) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x8b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc7) + m.mrsd(0, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // MOVQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x89) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // MOVQ mm, r64 + if isMM(v0) && isReg64(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVQ r64, mm + if isReg64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVQ mm, m64 + if isMM(v0) && isM64(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // MOVQ xmm, r64 + if isXMM(v0) && isReg64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVQ r64, xmm + if isReg64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xd6) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVQ m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x7e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVQ xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xd6) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVQ") + } + return p +} + +// MOVQ2DQ performs "Move Quadword from MMX Technology to XMM Register". +// +// Mnemonic : MOVQ2DQ +// Supported forms : (1 form) +// +// * MOVQ2DQ mm, xmm [SSE2] +// +func (self *Program) MOVQ2DQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVQ2DQ", 2, Operands { v0, v1 }) + // MOVQ2DQ mm, xmm + if isMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MOVQ2DQ") + } + return p +} + +// MOVSBL performs "Move with Sign-Extension". +// +// Mnemonic : MOVSX +// Supported forms : (2 forms) +// +// * MOVSBL r8, r32 +// * MOVSBL m8, r32 +// +func (self *Program) MOVSBL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSBL", 2, Operands { v0, v1 }) + // MOVSBL r8, r32 + if isReg8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSBL m8, r32 + if isM8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbe) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSBL") + } + return p +} + +// MOVSBQ performs "Move with Sign-Extension". +// +// Mnemonic : MOVSX +// Supported forms : (2 forms) +// +// * MOVSBQ r8, r64 +// * MOVSBQ m8, r64 +// +func (self *Program) MOVSBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSBQ", 2, Operands { v0, v1 }) + // MOVSBQ r8, r64 + if isReg8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSBQ m8, r64 + if isM8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xbe) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSBQ") + } + return p +} + +// MOVSBW performs "Move with Sign-Extension". +// +// Mnemonic : MOVSX +// Supported forms : (2 forms) +// +// * MOVSBW r8, r16 +// * MOVSBW m8, r16 +// +func (self *Program) MOVSBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSBW", 2, Operands { v0, v1 }) + // MOVSBW r8, r16 + if isReg8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSBW m8, r16 + if isM8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbe) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSBW") + } + return p +} + +// MOVSD performs "Move Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : MOVSD +// Supported forms : (3 forms) +// +// * MOVSD xmm, xmm [SSE2] +// * MOVSD m64, xmm [SSE2] +// * MOVSD xmm, m64 [SSE2] +// +func (self *Program) MOVSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSD", 2, Operands { v0, v1 }) + // MOVSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVSD xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSD") + } + return p +} + +// MOVSHDUP performs "Move Packed Single-FP High and Duplicate". +// +// Mnemonic : MOVSHDUP +// Supported forms : (2 forms) +// +// * MOVSHDUP xmm, xmm [SSE3] +// * MOVSHDUP m128, xmm [SSE3] +// +func (self *Program) MOVSHDUP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSHDUP", 2, Operands { v0, v1 }) + // MOVSHDUP xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSHDUP m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSHDUP") + } + return p +} + +// MOVSLDUP performs "Move Packed Single-FP Low and Duplicate". +// +// Mnemonic : MOVSLDUP +// Supported forms : (2 forms) +// +// * MOVSLDUP xmm, xmm [SSE3] +// * MOVSLDUP m128, xmm [SSE3] +// +func (self *Program) MOVSLDUP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSLDUP", 2, Operands { v0, v1 }) + // MOVSLDUP xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSLDUP m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSLDUP") + } + return p +} + +// MOVSLQ performs "Move Doubleword to Quadword with Sign-Extension". +// +// Mnemonic : MOVSXD +// Supported forms : (2 forms) +// +// * MOVSLQ r32, r64 +// * MOVSLQ m32, r64 +// +func (self *Program) MOVSLQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSLQ", 2, Operands { v0, v1 }) + // MOVSLQ r32, r64 + if isReg32(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x63) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSLQ m32, r64 + if isM32(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x63) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSLQ") + } + return p +} + +// MOVSS performs "Move Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : MOVSS +// Supported forms : (3 forms) +// +// * MOVSS xmm, xmm [SSE] +// * MOVSS m32, xmm [SSE] +// * MOVSS xmm, m32 [SSE] +// +func (self *Program) MOVSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSS", 2, Operands { v0, v1 }) + // MOVSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVSS xmm, m32 + if isXMM(v0) && isM32(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSS") + } + return p +} + +// MOVSWL performs "Move with Sign-Extension". +// +// Mnemonic : MOVSX +// Supported forms : (2 forms) +// +// * MOVSWL r16, r32 +// * MOVSWL m16, r32 +// +func (self *Program) MOVSWL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSWL", 2, Operands { v0, v1 }) + // MOVSWL r16, r32 + if isReg16(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSWL m16, r32 + if isM16(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSWL") + } + return p +} + +// MOVSWQ performs "Move with Sign-Extension". +// +// Mnemonic : MOVSX +// Supported forms : (2 forms) +// +// * MOVSWQ r16, r64 +// * MOVSWQ m16, r64 +// +func (self *Program) MOVSWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSWQ", 2, Operands { v0, v1 }) + // MOVSWQ r16, r64 + if isReg16(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSWQ m16, r64 + if isM16(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xbf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSWQ") + } + return p +} + +// MOVUPD performs "Move Unaligned Packed Double-Precision Floating-Point Values". +// +// Mnemonic : MOVUPD +// Supported forms : (3 forms) +// +// * MOVUPD xmm, xmm [SSE2] +// * MOVUPD m128, xmm [SSE2] +// * MOVUPD xmm, m128 [SSE2] +// +func (self *Program) MOVUPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVUPD", 2, Operands { v0, v1 }) + // MOVUPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVUPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVUPD xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVUPD") + } + return p +} + +// MOVUPS performs "Move Unaligned Packed Single-Precision Floating-Point Values". +// +// Mnemonic : MOVUPS +// Supported forms : (3 forms) +// +// * MOVUPS xmm, xmm [SSE] +// * MOVUPS m128, xmm [SSE] +// * MOVUPS xmm, m128 [SSE] +// +func (self *Program) MOVUPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVUPS", 2, Operands { v0, v1 }) + // MOVUPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVUPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVUPS xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVUPS") + } + return p +} + +// MOVW performs "Move". +// +// Mnemonic : MOV +// Supported forms : (5 forms) +// +// * MOVW imm16, r16 +// * MOVW r16, r16 +// * MOVW m16, r16 +// * MOVW imm16, m16 +// * MOVW r16, m16 +// +func (self *Program) MOVW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVW", 2, Operands { v0, v1 }) + // MOVW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc7) + m.emit(0xc0 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xb8 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // MOVW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x89) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x8b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc7) + m.mrsd(0, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // MOVW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x89) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVW") + } + return p +} + +// MOVZBL performs "Move with Zero-Extend". +// +// Mnemonic : MOVZX +// Supported forms : (2 forms) +// +// * MOVZBL r8, r32 +// * MOVZBL m8, r32 +// +func (self *Program) MOVZBL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVZBL", 2, Operands { v0, v1 }) + // MOVZBL r8, r32 + if isReg8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVZBL m8, r32 + if isM8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xb6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVZBL") + } + return p +} + +// MOVZBQ performs "Move with Zero-Extend". +// +// Mnemonic : MOVZX +// Supported forms : (2 forms) +// +// * MOVZBQ r8, r64 +// * MOVZBQ m8, r64 +// +func (self *Program) MOVZBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVZBQ", 2, Operands { v0, v1 }) + // MOVZBQ r8, r64 + if isReg8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVZBQ m8, r64 + if isM8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xb6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVZBQ") + } + return p +} + +// MOVZBW performs "Move with Zero-Extend". +// +// Mnemonic : MOVZX +// Supported forms : (2 forms) +// +// * MOVZBW r8, r16 +// * MOVZBW m8, r16 +// +func (self *Program) MOVZBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVZBW", 2, Operands { v0, v1 }) + // MOVZBW r8, r16 + if isReg8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVZBW m8, r16 + if isM8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xb6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVZBW") + } + return p +} + +// MOVZWL performs "Move with Zero-Extend". +// +// Mnemonic : MOVZX +// Supported forms : (2 forms) +// +// * MOVZWL r16, r32 +// * MOVZWL m16, r32 +// +func (self *Program) MOVZWL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVZWL", 2, Operands { v0, v1 }) + // MOVZWL r16, r32 + if isReg16(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVZWL m16, r32 + if isM16(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xb7) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVZWL") + } + return p +} + +// MOVZWQ performs "Move with Zero-Extend". +// +// Mnemonic : MOVZX +// Supported forms : (2 forms) +// +// * MOVZWQ r16, r64 +// * MOVZWQ m16, r64 +// +func (self *Program) MOVZWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVZWQ", 2, Operands { v0, v1 }) + // MOVZWQ r16, r64 + if isReg16(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVZWQ m16, r64 + if isM16(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xb7) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVZWQ") + } + return p +} + +// MPSADBW performs "Compute Multiple Packed Sums of Absolute Difference". +// +// Mnemonic : MPSADBW +// Supported forms : (2 forms) +// +// * MPSADBW imm8, xmm, xmm [SSE4.1] +// * MPSADBW imm8, m128, xmm [SSE4.1] +// +func (self *Program) MPSADBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("MPSADBW", 3, Operands { v0, v1, v2 }) + // MPSADBW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x42) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // MPSADBW imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x42) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MPSADBW") + } + return p +} + +// MULB performs "Unsigned Multiply". +// +// Mnemonic : MUL +// Supported forms : (2 forms) +// +// * MULB r8 +// * MULB m8 +// +func (self *Program) MULB(v0 interface{}) *Instruction { + p := self.alloc("MULB", 1, Operands { v0 }) + // MULB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xf6) + m.emit(0xe0 | lcode(v[0])) + }) + } + // MULB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf6) + m.mrsd(4, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULB") + } + return p +} + +// MULL performs "Unsigned Multiply". +// +// Mnemonic : MUL +// Supported forms : (2 forms) +// +// * MULL r32 +// * MULL m32 +// +func (self *Program) MULL(v0 interface{}) *Instruction { + p := self.alloc("MULL", 1, Operands { v0 }) + // MULL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xe0 | lcode(v[0])) + }) + } + // MULL m32 + if isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(4, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULL") + } + return p +} + +// MULPD performs "Multiply Packed Double-Precision Floating-Point Values". +// +// Mnemonic : MULPD +// Supported forms : (2 forms) +// +// * MULPD xmm, xmm [SSE2] +// * MULPD m128, xmm [SSE2] +// +func (self *Program) MULPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MULPD", 2, Operands { v0, v1 }) + // MULPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MULPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULPD") + } + return p +} + +// MULPS performs "Multiply Packed Single-Precision Floating-Point Values". +// +// Mnemonic : MULPS +// Supported forms : (2 forms) +// +// * MULPS xmm, xmm [SSE] +// * MULPS m128, xmm [SSE] +// +func (self *Program) MULPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MULPS", 2, Operands { v0, v1 }) + // MULPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MULPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULPS") + } + return p +} + +// MULQ performs "Unsigned Multiply". +// +// Mnemonic : MUL +// Supported forms : (2 forms) +// +// * MULQ r64 +// * MULQ m64 +// +func (self *Program) MULQ(v0 interface{}) *Instruction { + p := self.alloc("MULQ", 1, Operands { v0 }) + // MULQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xf7) + m.emit(0xe0 | lcode(v[0])) + }) + } + // MULQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xf7) + m.mrsd(4, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULQ") + } + return p +} + +// MULSD performs "Multiply Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : MULSD +// Supported forms : (2 forms) +// +// * MULSD xmm, xmm [SSE2] +// * MULSD m64, xmm [SSE2] +// +func (self *Program) MULSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MULSD", 2, Operands { v0, v1 }) + // MULSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MULSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULSD") + } + return p +} + +// MULSS performs "Multiply Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : MULSS +// Supported forms : (2 forms) +// +// * MULSS xmm, xmm [SSE] +// * MULSS m32, xmm [SSE] +// +func (self *Program) MULSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MULSS", 2, Operands { v0, v1 }) + // MULSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MULSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULSS") + } + return p +} + +// MULW performs "Unsigned Multiply". +// +// Mnemonic : MUL +// Supported forms : (2 forms) +// +// * MULW r16 +// * MULW m16 +// +func (self *Program) MULW(v0 interface{}) *Instruction { + p := self.alloc("MULW", 1, Operands { v0 }) + // MULW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xe0 | lcode(v[0])) + }) + } + // MULW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(4, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULW") + } + return p +} + +// MULXL performs "Unsigned Multiply Without Affecting Flags". +// +// Mnemonic : MULX +// Supported forms : (2 forms) +// +// * MULXL r32, r32, r32 [BMI2] +// * MULXL m32, r32, r32 [BMI2] +// +func (self *Program) MULXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("MULXL", 3, Operands { v0, v1, v2 }) + // MULXL r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7b ^ (hlcode(v[1]) << 3)) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // MULXL m32, r32, r32 + if isM32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x03, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULXL") + } + return p +} + +// MULXQ performs "Unsigned Multiply Without Affecting Flags". +// +// Mnemonic : MULX +// Supported forms : (2 forms) +// +// * MULXQ r64, r64, r64 [BMI2] +// * MULXQ m64, r64, r64 [BMI2] +// +func (self *Program) MULXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("MULXQ", 3, Operands { v0, v1, v2 }) + // MULXQ r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfb ^ (hlcode(v[1]) << 3)) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // MULXQ m64, r64, r64 + if isM64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x83, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULXQ") + } + return p +} + +// MWAIT performs "Monitor Wait". +// +// Mnemonic : MWAIT +// Supported forms : (1 form) +// +// * MWAIT [MONITOR] +// +func (self *Program) MWAIT() *Instruction { + p := self.alloc("MWAIT", 0, Operands { }) + // MWAIT + self.require(ISA_MONITOR) + p.domain = DomainMisc + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x01) + m.emit(0xc9) + }) + return p +} + +// MWAITX performs "Monitor Wait with Timeout". +// +// Mnemonic : MWAITX +// Supported forms : (1 form) +// +// * MWAITX [MONITORX] +// +func (self *Program) MWAITX() *Instruction { + p := self.alloc("MWAITX", 0, Operands { }) + // MWAITX + self.require(ISA_MONITORX) + p.domain = DomainMisc + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x01) + m.emit(0xfb) + }) + return p +} + +// NEGB performs "Two's Complement Negation". +// +// Mnemonic : NEG +// Supported forms : (2 forms) +// +// * NEGB r8 +// * NEGB m8 +// +func (self *Program) NEGB(v0 interface{}) *Instruction { + p := self.alloc("NEGB", 1, Operands { v0 }) + // NEGB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xf6) + m.emit(0xd8 | lcode(v[0])) + }) + } + // NEGB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf6) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NEGB") + } + return p +} + +// NEGL performs "Two's Complement Negation". +// +// Mnemonic : NEG +// Supported forms : (2 forms) +// +// * NEGL r32 +// * NEGL m32 +// +func (self *Program) NEGL(v0 interface{}) *Instruction { + p := self.alloc("NEGL", 1, Operands { v0 }) + // NEGL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xd8 | lcode(v[0])) + }) + } + // NEGL m32 + if isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NEGL") + } + return p +} + +// NEGQ performs "Two's Complement Negation". +// +// Mnemonic : NEG +// Supported forms : (2 forms) +// +// * NEGQ r64 +// * NEGQ m64 +// +func (self *Program) NEGQ(v0 interface{}) *Instruction { + p := self.alloc("NEGQ", 1, Operands { v0 }) + // NEGQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xf7) + m.emit(0xd8 | lcode(v[0])) + }) + } + // NEGQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xf7) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NEGQ") + } + return p +} + +// NEGW performs "Two's Complement Negation". +// +// Mnemonic : NEG +// Supported forms : (2 forms) +// +// * NEGW r16 +// * NEGW m16 +// +func (self *Program) NEGW(v0 interface{}) *Instruction { + p := self.alloc("NEGW", 1, Operands { v0 }) + // NEGW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xd8 | lcode(v[0])) + }) + } + // NEGW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NEGW") + } + return p +} + +// NOP performs "No Operation". +// +// Mnemonic : NOP +// Supported forms : (1 form) +// +// * NOP +// +func (self *Program) NOP() *Instruction { + p := self.alloc("NOP", 0, Operands { }) + // NOP + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x90) + }) + return p +} + +// NOTB performs "One's Complement Negation". +// +// Mnemonic : NOT +// Supported forms : (2 forms) +// +// * NOTB r8 +// * NOTB m8 +// +func (self *Program) NOTB(v0 interface{}) *Instruction { + p := self.alloc("NOTB", 1, Operands { v0 }) + // NOTB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xf6) + m.emit(0xd0 | lcode(v[0])) + }) + } + // NOTB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf6) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NOTB") + } + return p +} + +// NOTL performs "One's Complement Negation". +// +// Mnemonic : NOT +// Supported forms : (2 forms) +// +// * NOTL r32 +// * NOTL m32 +// +func (self *Program) NOTL(v0 interface{}) *Instruction { + p := self.alloc("NOTL", 1, Operands { v0 }) + // NOTL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xd0 | lcode(v[0])) + }) + } + // NOTL m32 + if isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NOTL") + } + return p +} + +// NOTQ performs "One's Complement Negation". +// +// Mnemonic : NOT +// Supported forms : (2 forms) +// +// * NOTQ r64 +// * NOTQ m64 +// +func (self *Program) NOTQ(v0 interface{}) *Instruction { + p := self.alloc("NOTQ", 1, Operands { v0 }) + // NOTQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xf7) + m.emit(0xd0 | lcode(v[0])) + }) + } + // NOTQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xf7) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NOTQ") + } + return p +} + +// NOTW performs "One's Complement Negation". +// +// Mnemonic : NOT +// Supported forms : (2 forms) +// +// * NOTW r16 +// * NOTW m16 +// +func (self *Program) NOTW(v0 interface{}) *Instruction { + p := self.alloc("NOTW", 1, Operands { v0 }) + // NOTW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xd0 | lcode(v[0])) + }) + } + // NOTW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NOTW") + } + return p +} + +// ORB performs "Logical Inclusive OR". +// +// Mnemonic : OR +// Supported forms : (6 forms) +// +// * ORB imm8, al +// * ORB imm8, r8 +// * ORB r8, r8 +// * ORB m8, r8 +// * ORB imm8, m8 +// * ORB r8, m8 +// +func (self *Program) ORB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ORB", 2, Operands { v0, v1 }) + // ORB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0c) + m.imm1(toImmAny(v[0])) + }) + } + // ORB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ORB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x08) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ORB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x0a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ORB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ORB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x08) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ORB") + } + return p +} + +// ORL performs "Logical Inclusive OR". +// +// Mnemonic : OR +// Supported forms : (8 forms) +// +// * ORL imm32, eax +// * ORL imm8, r32 +// * ORL imm32, r32 +// * ORL r32, r32 +// * ORL m32, r32 +// * ORL imm8, m32 +// * ORL imm32, m32 +// * ORL r32, m32 +// +func (self *Program) ORL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ORL", 2, Operands { v0, v1 }) + // ORL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0d) + m.imm4(toImmAny(v[0])) + }) + } + // ORL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ORL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xc8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ORL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x09) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ORL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ORL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ORL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(1, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ORL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x09) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ORL") + } + return p +} + +// ORPD performs "Bitwise Logical OR of Double-Precision Floating-Point Values". +// +// Mnemonic : ORPD +// Supported forms : (2 forms) +// +// * ORPD xmm, xmm [SSE2] +// * ORPD m128, xmm [SSE2] +// +func (self *Program) ORPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ORPD", 2, Operands { v0, v1 }) + // ORPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x56) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ORPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x56) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ORPD") + } + return p +} + +// ORPS performs "Bitwise Logical OR of Single-Precision Floating-Point Values". +// +// Mnemonic : ORPS +// Supported forms : (2 forms) +// +// * ORPS xmm, xmm [SSE] +// * ORPS m128, xmm [SSE] +// +func (self *Program) ORPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ORPS", 2, Operands { v0, v1 }) + // ORPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x56) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ORPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x56) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ORPS") + } + return p +} + +// ORQ performs "Logical Inclusive OR". +// +// Mnemonic : OR +// Supported forms : (8 forms) +// +// * ORQ imm32, rax +// * ORQ imm8, r64 +// * ORQ imm32, r64 +// * ORQ r64, r64 +// * ORQ m64, r64 +// * ORQ imm8, m64 +// * ORQ imm32, m64 +// * ORQ r64, m64 +// +func (self *Program) ORQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ORQ", 2, Operands { v0, v1 }) + // ORQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x0d) + m.imm4(toImmAny(v[0])) + }) + } + // ORQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ORQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xc8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ORQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x09) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ORQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ORQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ORQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(1, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ORQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x09) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ORQ") + } + return p +} + +// ORW performs "Logical Inclusive OR". +// +// Mnemonic : OR +// Supported forms : (8 forms) +// +// * ORW imm16, ax +// * ORW imm8, r16 +// * ORW imm16, r16 +// * ORW r16, r16 +// * ORW m16, r16 +// * ORW imm8, m16 +// * ORW imm16, m16 +// * ORW r16, m16 +// +func (self *Program) ORW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ORW", 2, Operands { v0, v1 }) + // ORW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x0d) + m.imm2(toImmAny(v[0])) + }) + } + // ORW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ORW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xc8 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // ORW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x09) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ORW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ORW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ORW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(1, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // ORW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x09) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ORW") + } + return p +} + +// PABSB performs "Packed Absolute Value of Byte Integers". +// +// Mnemonic : PABSB +// Supported forms : (4 forms) +// +// * PABSB mm, mm [SSSE3] +// * PABSB m64, mm [SSSE3] +// * PABSB xmm, xmm [SSSE3] +// * PABSB m128, xmm [SSSE3] +// +func (self *Program) PABSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PABSB", 2, Operands { v0, v1 }) + // PABSB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PABSB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PABSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PABSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PABSB") + } + return p +} + +// PABSD performs "Packed Absolute Value of Doubleword Integers". +// +// Mnemonic : PABSD +// Supported forms : (4 forms) +// +// * PABSD mm, mm [SSSE3] +// * PABSD m64, mm [SSSE3] +// * PABSD xmm, xmm [SSSE3] +// * PABSD m128, xmm [SSSE3] +// +func (self *Program) PABSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PABSD", 2, Operands { v0, v1 }) + // PABSD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PABSD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PABSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PABSD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PABSD") + } + return p +} + +// PABSW performs "Packed Absolute Value of Word Integers". +// +// Mnemonic : PABSW +// Supported forms : (4 forms) +// +// * PABSW mm, mm [SSSE3] +// * PABSW m64, mm [SSSE3] +// * PABSW xmm, xmm [SSSE3] +// * PABSW m128, xmm [SSSE3] +// +func (self *Program) PABSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PABSW", 2, Operands { v0, v1 }) + // PABSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PABSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PABSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PABSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PABSW") + } + return p +} + +// PACKSSDW performs "Pack Doublewords into Words with Signed Saturation". +// +// Mnemonic : PACKSSDW +// Supported forms : (4 forms) +// +// * PACKSSDW mm, mm [MMX] +// * PACKSSDW m64, mm [MMX] +// * PACKSSDW xmm, xmm [SSE2] +// * PACKSSDW m128, xmm [SSE2] +// +func (self *Program) PACKSSDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PACKSSDW", 2, Operands { v0, v1 }) + // PACKSSDW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PACKSSDW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PACKSSDW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PACKSSDW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PACKSSDW") + } + return p +} + +// PACKSSWB performs "Pack Words into Bytes with Signed Saturation". +// +// Mnemonic : PACKSSWB +// Supported forms : (4 forms) +// +// * PACKSSWB mm, mm [MMX] +// * PACKSSWB m64, mm [MMX] +// * PACKSSWB xmm, xmm [SSE2] +// * PACKSSWB m128, xmm [SSE2] +// +func (self *Program) PACKSSWB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PACKSSWB", 2, Operands { v0, v1 }) + // PACKSSWB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x63) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PACKSSWB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x63) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PACKSSWB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x63) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PACKSSWB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x63) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PACKSSWB") + } + return p +} + +// PACKUSDW performs "Pack Doublewords into Words with Unsigned Saturation". +// +// Mnemonic : PACKUSDW +// Supported forms : (2 forms) +// +// * PACKUSDW xmm, xmm [SSE4.1] +// * PACKUSDW m128, xmm [SSE4.1] +// +func (self *Program) PACKUSDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PACKUSDW", 2, Operands { v0, v1 }) + // PACKUSDW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PACKUSDW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x2b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PACKUSDW") + } + return p +} + +// PACKUSWB performs "Pack Words into Bytes with Unsigned Saturation". +// +// Mnemonic : PACKUSWB +// Supported forms : (4 forms) +// +// * PACKUSWB mm, mm [MMX] +// * PACKUSWB m64, mm [MMX] +// * PACKUSWB xmm, xmm [SSE2] +// * PACKUSWB m128, xmm [SSE2] +// +func (self *Program) PACKUSWB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PACKUSWB", 2, Operands { v0, v1 }) + // PACKUSWB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x67) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PACKUSWB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x67) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PACKUSWB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x67) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PACKUSWB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x67) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PACKUSWB") + } + return p +} + +// PADDB performs "Add Packed Byte Integers". +// +// Mnemonic : PADDB +// Supported forms : (4 forms) +// +// * PADDB mm, mm [MMX] +// * PADDB m64, mm [MMX] +// * PADDB xmm, xmm [SSE2] +// * PADDB m128, xmm [SSE2] +// +func (self *Program) PADDB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDB", 2, Operands { v0, v1 }) + // PADDB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDB") + } + return p +} + +// PADDD performs "Add Packed Doubleword Integers". +// +// Mnemonic : PADDD +// Supported forms : (4 forms) +// +// * PADDD mm, mm [MMX] +// * PADDD m64, mm [MMX] +// * PADDD xmm, xmm [SSE2] +// * PADDD m128, xmm [SSE2] +// +func (self *Program) PADDD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDD", 2, Operands { v0, v1 }) + // PADDD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfe) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfe) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDD") + } + return p +} + +// PADDQ performs "Add Packed Quadword Integers". +// +// Mnemonic : PADDQ +// Supported forms : (4 forms) +// +// * PADDQ mm, mm [SSE2] +// * PADDQ m64, mm [SSE2] +// * PADDQ xmm, xmm [SSE2] +// * PADDQ m128, xmm [SSE2] +// +func (self *Program) PADDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDQ", 2, Operands { v0, v1 }) + // PADDQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd4) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd4) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDQ") + } + return p +} + +// PADDSB performs "Add Packed Signed Byte Integers with Signed Saturation". +// +// Mnemonic : PADDSB +// Supported forms : (4 forms) +// +// * PADDSB mm, mm [MMX] +// * PADDSB m64, mm [MMX] +// * PADDSB xmm, xmm [SSE2] +// * PADDSB m128, xmm [SSE2] +// +func (self *Program) PADDSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDSB", 2, Operands { v0, v1 }) + // PADDSB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xec) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDSB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xec) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xec) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xec) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDSB") + } + return p +} + +// PADDSW performs "Add Packed Signed Word Integers with Signed Saturation". +// +// Mnemonic : PADDSW +// Supported forms : (4 forms) +// +// * PADDSW mm, mm [MMX] +// * PADDSW m64, mm [MMX] +// * PADDSW xmm, xmm [SSE2] +// * PADDSW m128, xmm [SSE2] +// +func (self *Program) PADDSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDSW", 2, Operands { v0, v1 }) + // PADDSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xed) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xed) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xed) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xed) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDSW") + } + return p +} + +// PADDUSB performs "Add Packed Unsigned Byte Integers with Unsigned Saturation". +// +// Mnemonic : PADDUSB +// Supported forms : (4 forms) +// +// * PADDUSB mm, mm [MMX] +// * PADDUSB m64, mm [MMX] +// * PADDUSB xmm, xmm [SSE2] +// * PADDUSB m128, xmm [SSE2] +// +func (self *Program) PADDUSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDUSB", 2, Operands { v0, v1 }) + // PADDUSB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDUSB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDUSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDUSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDUSB") + } + return p +} + +// PADDUSW performs "Add Packed Unsigned Word Integers with Unsigned Saturation". +// +// Mnemonic : PADDUSW +// Supported forms : (4 forms) +// +// * PADDUSW mm, mm [MMX] +// * PADDUSW m64, mm [MMX] +// * PADDUSW xmm, xmm [SSE2] +// * PADDUSW m128, xmm [SSE2] +// +func (self *Program) PADDUSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDUSW", 2, Operands { v0, v1 }) + // PADDUSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDUSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDUSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDUSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDUSW") + } + return p +} + +// PADDW performs "Add Packed Word Integers". +// +// Mnemonic : PADDW +// Supported forms : (4 forms) +// +// * PADDW mm, mm [MMX] +// * PADDW m64, mm [MMX] +// * PADDW xmm, xmm [SSE2] +// * PADDW m128, xmm [SSE2] +// +func (self *Program) PADDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDW", 2, Operands { v0, v1 }) + // PADDW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDW") + } + return p +} + +// PALIGNR performs "Packed Align Right". +// +// Mnemonic : PALIGNR +// Supported forms : (4 forms) +// +// * PALIGNR imm8, mm, mm [SSSE3] +// * PALIGNR imm8, m64, mm [SSSE3] +// * PALIGNR imm8, xmm, xmm [SSSE3] +// * PALIGNR imm8, m128, xmm [SSSE3] +// +func (self *Program) PALIGNR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PALIGNR", 3, Operands { v0, v1, v2 }) + // PALIGNR imm8, mm, mm + if isImm8(v0) && isMM(v1) && isMM(v2) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PALIGNR imm8, m64, mm + if isImm8(v0) && isM64(v1) && isMM(v2) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0f) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // PALIGNR imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PALIGNR imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0f) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PALIGNR") + } + return p +} + +// PAND performs "Packed Bitwise Logical AND". +// +// Mnemonic : PAND +// Supported forms : (4 forms) +// +// * PAND mm, mm [MMX] +// * PAND m64, mm [MMX] +// * PAND xmm, xmm [SSE2] +// * PAND m128, xmm [SSE2] +// +func (self *Program) PAND(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PAND", 2, Operands { v0, v1 }) + // PAND mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PAND m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PAND xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PAND m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PAND") + } + return p +} + +// PANDN performs "Packed Bitwise Logical AND NOT". +// +// Mnemonic : PANDN +// Supported forms : (4 forms) +// +// * PANDN mm, mm [MMX] +// * PANDN m64, mm [MMX] +// * PANDN xmm, xmm [SSE2] +// * PANDN m128, xmm [SSE2] +// +func (self *Program) PANDN(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PANDN", 2, Operands { v0, v1 }) + // PANDN mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PANDN m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PANDN xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PANDN m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PANDN") + } + return p +} + +// PAUSE performs "Spin Loop Hint". +// +// Mnemonic : PAUSE +// Supported forms : (1 form) +// +// * PAUSE +// +func (self *Program) PAUSE() *Instruction { + p := self.alloc("PAUSE", 0, Operands { }) + // PAUSE + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x90) + }) + return p +} + +// PAVGB performs "Average Packed Byte Integers". +// +// Mnemonic : PAVGB +// Supported forms : (4 forms) +// +// * PAVGB mm, mm [MMX+] +// * PAVGB m64, mm [MMX+] +// * PAVGB xmm, xmm [SSE2] +// * PAVGB m128, xmm [SSE2] +// +func (self *Program) PAVGB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PAVGB", 2, Operands { v0, v1 }) + // PAVGB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe0) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PAVGB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PAVGB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe0) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PAVGB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PAVGB") + } + return p +} + +// PAVGUSB performs "Average Packed Byte Integers". +// +// Mnemonic : PAVGUSB +// Supported forms : (2 forms) +// +// * PAVGUSB mm, mm [3dnow!] +// * PAVGUSB m64, mm [3dnow!] +// +func (self *Program) PAVGUSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PAVGUSB", 2, Operands { v0, v1 }) + // PAVGUSB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xbf) + }) + } + // PAVGUSB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xbf) + }) + } + if p.len == 0 { + panic("invalid operands for PAVGUSB") + } + return p +} + +// PAVGW performs "Average Packed Word Integers". +// +// Mnemonic : PAVGW +// Supported forms : (4 forms) +// +// * PAVGW mm, mm [MMX+] +// * PAVGW m64, mm [MMX+] +// * PAVGW xmm, xmm [SSE2] +// * PAVGW m128, xmm [SSE2] +// +func (self *Program) PAVGW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PAVGW", 2, Operands { v0, v1 }) + // PAVGW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PAVGW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PAVGW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PAVGW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PAVGW") + } + return p +} + +// PBLENDVB performs "Variable Blend Packed Bytes". +// +// Mnemonic : PBLENDVB +// Supported forms : (2 forms) +// +// * PBLENDVB xmm0, xmm, xmm [SSE4.1] +// * PBLENDVB xmm0, m128, xmm [SSE4.1] +// +func (self *Program) PBLENDVB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PBLENDVB", 3, Operands { v0, v1, v2 }) + // PBLENDVB xmm0, xmm, xmm + if v0 == XMM0 && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // PBLENDVB xmm0, m128, xmm + if v0 == XMM0 && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x10) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PBLENDVB") + } + return p +} + +// PBLENDW performs "Blend Packed Words". +// +// Mnemonic : PBLENDW +// Supported forms : (2 forms) +// +// * PBLENDW imm8, xmm, xmm [SSE4.1] +// * PBLENDW imm8, m128, xmm [SSE4.1] +// +func (self *Program) PBLENDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PBLENDW", 3, Operands { v0, v1, v2 }) + // PBLENDW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PBLENDW imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0e) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PBLENDW") + } + return p +} + +// PCLMULQDQ performs "Carry-Less Quadword Multiplication". +// +// Mnemonic : PCLMULQDQ +// Supported forms : (2 forms) +// +// * PCLMULQDQ imm8, xmm, xmm [PCLMULQDQ] +// * PCLMULQDQ imm8, m128, xmm [PCLMULQDQ] +// +func (self *Program) PCLMULQDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PCLMULQDQ", 3, Operands { v0, v1, v2 }) + // PCLMULQDQ imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_PCLMULQDQ) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x44) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PCLMULQDQ imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_PCLMULQDQ) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x44) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PCLMULQDQ") + } + return p +} + +// PCMPEQB performs "Compare Packed Byte Data for Equality". +// +// Mnemonic : PCMPEQB +// Supported forms : (4 forms) +// +// * PCMPEQB mm, mm [MMX] +// * PCMPEQB m64, mm [MMX] +// * PCMPEQB xmm, xmm [SSE2] +// * PCMPEQB m128, xmm [SSE2] +// +func (self *Program) PCMPEQB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPEQB", 2, Operands { v0, v1 }) + // PCMPEQB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x74) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPEQB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x74) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PCMPEQB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x74) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPEQB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x74) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPEQB") + } + return p +} + +// PCMPEQD performs "Compare Packed Doubleword Data for Equality". +// +// Mnemonic : PCMPEQD +// Supported forms : (4 forms) +// +// * PCMPEQD mm, mm [MMX] +// * PCMPEQD m64, mm [MMX] +// * PCMPEQD xmm, xmm [SSE2] +// * PCMPEQD m128, xmm [SSE2] +// +func (self *Program) PCMPEQD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPEQD", 2, Operands { v0, v1 }) + // PCMPEQD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x76) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPEQD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x76) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PCMPEQD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x76) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPEQD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x76) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPEQD") + } + return p +} + +// PCMPEQQ performs "Compare Packed Quadword Data for Equality". +// +// Mnemonic : PCMPEQQ +// Supported forms : (2 forms) +// +// * PCMPEQQ xmm, xmm [SSE4.1] +// * PCMPEQQ m128, xmm [SSE4.1] +// +func (self *Program) PCMPEQQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPEQQ", 2, Operands { v0, v1 }) + // PCMPEQQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x29) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPEQQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x29) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPEQQ") + } + return p +} + +// PCMPEQW performs "Compare Packed Word Data for Equality". +// +// Mnemonic : PCMPEQW +// Supported forms : (4 forms) +// +// * PCMPEQW mm, mm [MMX] +// * PCMPEQW m64, mm [MMX] +// * PCMPEQW xmm, xmm [SSE2] +// * PCMPEQW m128, xmm [SSE2] +// +func (self *Program) PCMPEQW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPEQW", 2, Operands { v0, v1 }) + // PCMPEQW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x75) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPEQW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x75) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PCMPEQW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x75) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPEQW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x75) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPEQW") + } + return p +} + +// PCMPESTRI performs "Packed Compare Explicit Length Strings, Return Index". +// +// Mnemonic : PCMPESTRI +// Supported forms : (2 forms) +// +// * PCMPESTRI imm8, xmm, xmm [SSE4.2] +// * PCMPESTRI imm8, m128, xmm [SSE4.2] +// +func (self *Program) PCMPESTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PCMPESTRI", 3, Operands { v0, v1, v2 }) + // PCMPESTRI imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x61) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PCMPESTRI imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x61) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPESTRI") + } + return p +} + +// PCMPESTRM performs "Packed Compare Explicit Length Strings, Return Mask". +// +// Mnemonic : PCMPESTRM +// Supported forms : (2 forms) +// +// * PCMPESTRM imm8, xmm, xmm [SSE4.2] +// * PCMPESTRM imm8, m128, xmm [SSE4.2] +// +func (self *Program) PCMPESTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PCMPESTRM", 3, Operands { v0, v1, v2 }) + // PCMPESTRM imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x60) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PCMPESTRM imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x60) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPESTRM") + } + return p +} + +// PCMPGTB performs "Compare Packed Signed Byte Integers for Greater Than". +// +// Mnemonic : PCMPGTB +// Supported forms : (4 forms) +// +// * PCMPGTB mm, mm [MMX] +// * PCMPGTB m64, mm [MMX] +// * PCMPGTB xmm, xmm [SSE2] +// * PCMPGTB m128, xmm [SSE2] +// +func (self *Program) PCMPGTB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPGTB", 2, Operands { v0, v1 }) + // PCMPGTB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x64) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPGTB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x64) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PCMPGTB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x64) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPGTB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x64) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPGTB") + } + return p +} + +// PCMPGTD performs "Compare Packed Signed Doubleword Integers for Greater Than". +// +// Mnemonic : PCMPGTD +// Supported forms : (4 forms) +// +// * PCMPGTD mm, mm [MMX] +// * PCMPGTD m64, mm [MMX] +// * PCMPGTD xmm, xmm [SSE2] +// * PCMPGTD m128, xmm [SSE2] +// +func (self *Program) PCMPGTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPGTD", 2, Operands { v0, v1 }) + // PCMPGTD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x66) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPGTD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x66) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PCMPGTD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x66) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPGTD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x66) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPGTD") + } + return p +} + +// PCMPGTQ performs "Compare Packed Data for Greater Than". +// +// Mnemonic : PCMPGTQ +// Supported forms : (2 forms) +// +// * PCMPGTQ xmm, xmm [SSE4.2] +// * PCMPGTQ m128, xmm [SSE4.2] +// +func (self *Program) PCMPGTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPGTQ", 2, Operands { v0, v1 }) + // PCMPGTQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x37) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPGTQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x37) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPGTQ") + } + return p +} + +// PCMPGTW performs "Compare Packed Signed Word Integers for Greater Than". +// +// Mnemonic : PCMPGTW +// Supported forms : (4 forms) +// +// * PCMPGTW mm, mm [MMX] +// * PCMPGTW m64, mm [MMX] +// * PCMPGTW xmm, xmm [SSE2] +// * PCMPGTW m128, xmm [SSE2] +// +func (self *Program) PCMPGTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPGTW", 2, Operands { v0, v1 }) + // PCMPGTW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x65) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPGTW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x65) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PCMPGTW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x65) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPGTW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x65) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPGTW") + } + return p +} + +// PCMPISTRI performs "Packed Compare Implicit Length Strings, Return Index". +// +// Mnemonic : PCMPISTRI +// Supported forms : (2 forms) +// +// * PCMPISTRI imm8, xmm, xmm [SSE4.2] +// * PCMPISTRI imm8, m128, xmm [SSE4.2] +// +func (self *Program) PCMPISTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PCMPISTRI", 3, Operands { v0, v1, v2 }) + // PCMPISTRI imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x63) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PCMPISTRI imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x63) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPISTRI") + } + return p +} + +// PCMPISTRM performs "Packed Compare Implicit Length Strings, Return Mask". +// +// Mnemonic : PCMPISTRM +// Supported forms : (2 forms) +// +// * PCMPISTRM imm8, xmm, xmm [SSE4.2] +// * PCMPISTRM imm8, m128, xmm [SSE4.2] +// +func (self *Program) PCMPISTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PCMPISTRM", 3, Operands { v0, v1, v2 }) + // PCMPISTRM imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x62) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PCMPISTRM imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x62) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPISTRM") + } + return p +} + +// PDEP performs "Parallel Bits Deposit". +// +// Mnemonic : PDEP +// Supported forms : (4 forms) +// +// * PDEP r32, r32, r32 [BMI2] +// * PDEP m32, r32, r32 [BMI2] +// * PDEP r64, r64, r64 [BMI2] +// * PDEP m64, r64, r64 [BMI2] +// +func (self *Program) PDEP(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PDEP", 3, Operands { v0, v1, v2 }) + // PDEP r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7b ^ (hlcode(v[1]) << 3)) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // PDEP m32, r32, r32 + if isM32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x03, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // PDEP r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfb ^ (hlcode(v[1]) << 3)) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // PDEP m64, r64, r64 + if isM64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x83, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PDEP") + } + return p +} + +// PEXT performs "Parallel Bits Extract". +// +// Mnemonic : PEXT +// Supported forms : (4 forms) +// +// * PEXT r32, r32, r32 [BMI2] +// * PEXT m32, r32, r32 [BMI2] +// * PEXT r64, r64, r64 [BMI2] +// * PEXT m64, r64, r64 [BMI2] +// +func (self *Program) PEXT(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PEXT", 3, Operands { v0, v1, v2 }) + // PEXT r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7a ^ (hlcode(v[1]) << 3)) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // PEXT m32, r32, r32 + if isM32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x02, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // PEXT r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfa ^ (hlcode(v[1]) << 3)) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // PEXT m64, r64, r64 + if isM64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x82, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PEXT") + } + return p +} + +// PEXTRB performs "Extract Byte". +// +// Mnemonic : PEXTRB +// Supported forms : (2 forms) +// +// * PEXTRB imm8, xmm, r32 [SSE4.1] +// * PEXTRB imm8, xmm, m8 [SSE4.1] +// +func (self *Program) PEXTRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PEXTRB", 3, Operands { v0, v1, v2 }) + // PEXTRB imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x14) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // PEXTRB imm8, xmm, m8 + if isImm8(v0) && isXMM(v1) && isM8(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x14) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PEXTRB") + } + return p +} + +// PEXTRD performs "Extract Doubleword". +// +// Mnemonic : PEXTRD +// Supported forms : (2 forms) +// +// * PEXTRD imm8, xmm, r32 [SSE4.1] +// * PEXTRD imm8, xmm, m32 [SSE4.1] +// +func (self *Program) PEXTRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PEXTRD", 3, Operands { v0, v1, v2 }) + // PEXTRD imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // PEXTRD imm8, xmm, m32 + if isImm8(v0) && isXMM(v1) && isM32(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PEXTRD") + } + return p +} + +// PEXTRQ performs "Extract Quadword". +// +// Mnemonic : PEXTRQ +// Supported forms : (2 forms) +// +// * PEXTRQ imm8, xmm, r64 [SSE4.1] +// * PEXTRQ imm8, xmm, m64 [SSE4.1] +// +func (self *Program) PEXTRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PEXTRQ", 3, Operands { v0, v1, v2 }) + // PEXTRQ imm8, xmm, r64 + if isImm8(v0) && isXMM(v1) && isReg64(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2])) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // PEXTRQ imm8, xmm, m64 + if isImm8(v0) && isXMM(v1) && isM64(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexm(1, hcode(v[1]), addr(v[2])) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PEXTRQ") + } + return p +} + +// PEXTRW performs "Extract Word". +// +// Mnemonic : PEXTRW +// Supported forms : (3 forms) +// +// * PEXTRW imm8, mm, r32 [MMX+] +// * PEXTRW imm8, xmm, r32 [SSE4.1] +// * PEXTRW imm8, xmm, m16 [SSE4.1] +// +func (self *Program) PEXTRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PEXTRW", 3, Operands { v0, v1, v2 }) + // PEXTRW imm8, mm, r32 + if isImm8(v0) && isMM(v1) && isReg32(v2) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PEXTRW imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x15) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PEXTRW imm8, xmm, m16 + if isImm8(v0) && isXMM(v1) && isM16(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x15) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PEXTRW") + } + return p +} + +// PF2ID performs "Packed Floating-Point to Integer Doubleword Converson". +// +// Mnemonic : PF2ID +// Supported forms : (2 forms) +// +// * PF2ID mm, mm [3dnow!] +// * PF2ID m64, mm [3dnow!] +// +func (self *Program) PF2ID(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PF2ID", 2, Operands { v0, v1 }) + // PF2ID mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x1d) + }) + } + // PF2ID m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x1d) + }) + } + if p.len == 0 { + panic("invalid operands for PF2ID") + } + return p +} + +// PF2IW performs "Packed Floating-Point to Integer Word Conversion". +// +// Mnemonic : PF2IW +// Supported forms : (2 forms) +// +// * PF2IW mm, mm [3dnow!+] +// * PF2IW m64, mm [3dnow!+] +// +func (self *Program) PF2IW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PF2IW", 2, Operands { v0, v1 }) + // PF2IW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x1c) + }) + } + // PF2IW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x1c) + }) + } + if p.len == 0 { + panic("invalid operands for PF2IW") + } + return p +} + +// PFACC performs "Packed Floating-Point Accumulate". +// +// Mnemonic : PFACC +// Supported forms : (2 forms) +// +// * PFACC mm, mm [3dnow!] +// * PFACC m64, mm [3dnow!] +// +func (self *Program) PFACC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFACC", 2, Operands { v0, v1 }) + // PFACC mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xae) + }) + } + // PFACC m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xae) + }) + } + if p.len == 0 { + panic("invalid operands for PFACC") + } + return p +} + +// PFADD performs "Packed Floating-Point Add". +// +// Mnemonic : PFADD +// Supported forms : (2 forms) +// +// * PFADD mm, mm [3dnow!] +// * PFADD m64, mm [3dnow!] +// +func (self *Program) PFADD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFADD", 2, Operands { v0, v1 }) + // PFADD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x9e) + }) + } + // PFADD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x9e) + }) + } + if p.len == 0 { + panic("invalid operands for PFADD") + } + return p +} + +// PFCMPEQ performs "Packed Floating-Point Compare for Equal". +// +// Mnemonic : PFCMPEQ +// Supported forms : (2 forms) +// +// * PFCMPEQ mm, mm [3dnow!] +// * PFCMPEQ m64, mm [3dnow!] +// +func (self *Program) PFCMPEQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFCMPEQ", 2, Operands { v0, v1 }) + // PFCMPEQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xb0) + }) + } + // PFCMPEQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xb0) + }) + } + if p.len == 0 { + panic("invalid operands for PFCMPEQ") + } + return p +} + +// PFCMPGE performs "Packed Floating-Point Compare for Greater or Equal". +// +// Mnemonic : PFCMPGE +// Supported forms : (2 forms) +// +// * PFCMPGE mm, mm [3dnow!] +// * PFCMPGE m64, mm [3dnow!] +// +func (self *Program) PFCMPGE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFCMPGE", 2, Operands { v0, v1 }) + // PFCMPGE mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x90) + }) + } + // PFCMPGE m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x90) + }) + } + if p.len == 0 { + panic("invalid operands for PFCMPGE") + } + return p +} + +// PFCMPGT performs "Packed Floating-Point Compare for Greater Than". +// +// Mnemonic : PFCMPGT +// Supported forms : (2 forms) +// +// * PFCMPGT mm, mm [3dnow!] +// * PFCMPGT m64, mm [3dnow!] +// +func (self *Program) PFCMPGT(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFCMPGT", 2, Operands { v0, v1 }) + // PFCMPGT mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xa0) + }) + } + // PFCMPGT m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xa0) + }) + } + if p.len == 0 { + panic("invalid operands for PFCMPGT") + } + return p +} + +// PFMAX performs "Packed Floating-Point Maximum". +// +// Mnemonic : PFMAX +// Supported forms : (2 forms) +// +// * PFMAX mm, mm [3dnow!] +// * PFMAX m64, mm [3dnow!] +// +func (self *Program) PFMAX(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFMAX", 2, Operands { v0, v1 }) + // PFMAX mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xa4) + }) + } + // PFMAX m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xa4) + }) + } + if p.len == 0 { + panic("invalid operands for PFMAX") + } + return p +} + +// PFMIN performs "Packed Floating-Point Minimum". +// +// Mnemonic : PFMIN +// Supported forms : (2 forms) +// +// * PFMIN mm, mm [3dnow!] +// * PFMIN m64, mm [3dnow!] +// +func (self *Program) PFMIN(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFMIN", 2, Operands { v0, v1 }) + // PFMIN mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x94) + }) + } + // PFMIN m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x94) + }) + } + if p.len == 0 { + panic("invalid operands for PFMIN") + } + return p +} + +// PFMUL performs "Packed Floating-Point Multiply". +// +// Mnemonic : PFMUL +// Supported forms : (2 forms) +// +// * PFMUL mm, mm [3dnow!] +// * PFMUL m64, mm [3dnow!] +// +func (self *Program) PFMUL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFMUL", 2, Operands { v0, v1 }) + // PFMUL mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xb4) + }) + } + // PFMUL m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xb4) + }) + } + if p.len == 0 { + panic("invalid operands for PFMUL") + } + return p +} + +// PFNACC performs "Packed Floating-Point Negative Accumulate". +// +// Mnemonic : PFNACC +// Supported forms : (2 forms) +// +// * PFNACC mm, mm [3dnow!+] +// * PFNACC m64, mm [3dnow!+] +// +func (self *Program) PFNACC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFNACC", 2, Operands { v0, v1 }) + // PFNACC mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x8a) + }) + } + // PFNACC m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x8a) + }) + } + if p.len == 0 { + panic("invalid operands for PFNACC") + } + return p +} + +// PFPNACC performs "Packed Floating-Point Positive-Negative Accumulate". +// +// Mnemonic : PFPNACC +// Supported forms : (2 forms) +// +// * PFPNACC mm, mm [3dnow!+] +// * PFPNACC m64, mm [3dnow!+] +// +func (self *Program) PFPNACC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFPNACC", 2, Operands { v0, v1 }) + // PFPNACC mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x8e) + }) + } + // PFPNACC m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x8e) + }) + } + if p.len == 0 { + panic("invalid operands for PFPNACC") + } + return p +} + +// PFRCP performs "Packed Floating-Point Reciprocal Approximation". +// +// Mnemonic : PFRCP +// Supported forms : (2 forms) +// +// * PFRCP mm, mm [3dnow!] +// * PFRCP m64, mm [3dnow!] +// +func (self *Program) PFRCP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFRCP", 2, Operands { v0, v1 }) + // PFRCP mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x96) + }) + } + // PFRCP m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x96) + }) + } + if p.len == 0 { + panic("invalid operands for PFRCP") + } + return p +} + +// PFRCPIT1 performs "Packed Floating-Point Reciprocal Iteration 1". +// +// Mnemonic : PFRCPIT1 +// Supported forms : (2 forms) +// +// * PFRCPIT1 mm, mm [3dnow!] +// * PFRCPIT1 m64, mm [3dnow!] +// +func (self *Program) PFRCPIT1(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFRCPIT1", 2, Operands { v0, v1 }) + // PFRCPIT1 mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xa6) + }) + } + // PFRCPIT1 m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xa6) + }) + } + if p.len == 0 { + panic("invalid operands for PFRCPIT1") + } + return p +} + +// PFRCPIT2 performs "Packed Floating-Point Reciprocal Iteration 2". +// +// Mnemonic : PFRCPIT2 +// Supported forms : (2 forms) +// +// * PFRCPIT2 mm, mm [3dnow!] +// * PFRCPIT2 m64, mm [3dnow!] +// +func (self *Program) PFRCPIT2(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFRCPIT2", 2, Operands { v0, v1 }) + // PFRCPIT2 mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xb6) + }) + } + // PFRCPIT2 m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xb6) + }) + } + if p.len == 0 { + panic("invalid operands for PFRCPIT2") + } + return p +} + +// PFRSQIT1 performs "Packed Floating-Point Reciprocal Square Root Iteration 1". +// +// Mnemonic : PFRSQIT1 +// Supported forms : (2 forms) +// +// * PFRSQIT1 mm, mm [3dnow!] +// * PFRSQIT1 m64, mm [3dnow!] +// +func (self *Program) PFRSQIT1(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFRSQIT1", 2, Operands { v0, v1 }) + // PFRSQIT1 mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xa7) + }) + } + // PFRSQIT1 m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xa7) + }) + } + if p.len == 0 { + panic("invalid operands for PFRSQIT1") + } + return p +} + +// PFRSQRT performs "Packed Floating-Point Reciprocal Square Root Approximation". +// +// Mnemonic : PFRSQRT +// Supported forms : (2 forms) +// +// * PFRSQRT mm, mm [3dnow!] +// * PFRSQRT m64, mm [3dnow!] +// +func (self *Program) PFRSQRT(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFRSQRT", 2, Operands { v0, v1 }) + // PFRSQRT mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x97) + }) + } + // PFRSQRT m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x97) + }) + } + if p.len == 0 { + panic("invalid operands for PFRSQRT") + } + return p +} + +// PFSUB performs "Packed Floating-Point Subtract". +// +// Mnemonic : PFSUB +// Supported forms : (2 forms) +// +// * PFSUB mm, mm [3dnow!] +// * PFSUB m64, mm [3dnow!] +// +func (self *Program) PFSUB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFSUB", 2, Operands { v0, v1 }) + // PFSUB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x9a) + }) + } + // PFSUB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x9a) + }) + } + if p.len == 0 { + panic("invalid operands for PFSUB") + } + return p +} + +// PFSUBR performs "Packed Floating-Point Subtract Reverse". +// +// Mnemonic : PFSUBR +// Supported forms : (2 forms) +// +// * PFSUBR mm, mm [3dnow!] +// * PFSUBR m64, mm [3dnow!] +// +func (self *Program) PFSUBR(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFSUBR", 2, Operands { v0, v1 }) + // PFSUBR mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xaa) + }) + } + // PFSUBR m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xaa) + }) + } + if p.len == 0 { + panic("invalid operands for PFSUBR") + } + return p +} + +// PHADDD performs "Packed Horizontal Add Doubleword Integer". +// +// Mnemonic : PHADDD +// Supported forms : (4 forms) +// +// * PHADDD mm, mm [SSSE3] +// * PHADDD m64, mm [SSSE3] +// * PHADDD xmm, xmm [SSSE3] +// * PHADDD m128, xmm [SSSE3] +// +func (self *Program) PHADDD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PHADDD", 2, Operands { v0, v1 }) + // PHADDD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x02) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHADDD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x02) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PHADDD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x02) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHADDD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x02) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PHADDD") + } + return p +} + +// PHADDSW performs "Packed Horizontal Add Signed Word Integers with Signed Saturation". +// +// Mnemonic : PHADDSW +// Supported forms : (4 forms) +// +// * PHADDSW mm, mm [SSSE3] +// * PHADDSW m64, mm [SSSE3] +// * PHADDSW xmm, xmm [SSSE3] +// * PHADDSW m128, xmm [SSSE3] +// +func (self *Program) PHADDSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PHADDSW", 2, Operands { v0, v1 }) + // PHADDSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x03) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHADDSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x03) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PHADDSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x03) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHADDSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x03) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PHADDSW") + } + return p +} + +// PHADDW performs "Packed Horizontal Add Word Integers". +// +// Mnemonic : PHADDW +// Supported forms : (4 forms) +// +// * PHADDW mm, mm [SSSE3] +// * PHADDW m64, mm [SSSE3] +// * PHADDW xmm, xmm [SSSE3] +// * PHADDW m128, xmm [SSSE3] +// +func (self *Program) PHADDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PHADDW", 2, Operands { v0, v1 }) + // PHADDW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x01) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHADDW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x01) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PHADDW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x01) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHADDW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x01) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PHADDW") + } + return p +} + +// PHMINPOSUW performs "Packed Horizontal Minimum of Unsigned Word Integers". +// +// Mnemonic : PHMINPOSUW +// Supported forms : (2 forms) +// +// * PHMINPOSUW xmm, xmm [SSE4.1] +// * PHMINPOSUW m128, xmm [SSE4.1] +// +func (self *Program) PHMINPOSUW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PHMINPOSUW", 2, Operands { v0, v1 }) + // PHMINPOSUW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x41) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHMINPOSUW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x41) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PHMINPOSUW") + } + return p +} + +// PHSUBD performs "Packed Horizontal Subtract Doubleword Integers". +// +// Mnemonic : PHSUBD +// Supported forms : (4 forms) +// +// * PHSUBD mm, mm [SSSE3] +// * PHSUBD m64, mm [SSSE3] +// * PHSUBD xmm, xmm [SSSE3] +// * PHSUBD m128, xmm [SSSE3] +// +func (self *Program) PHSUBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PHSUBD", 2, Operands { v0, v1 }) + // PHSUBD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x06) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHSUBD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x06) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PHSUBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x06) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHSUBD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x06) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PHSUBD") + } + return p +} + +// PHSUBSW performs "Packed Horizontal Subtract Signed Word Integers with Signed Saturation". +// +// Mnemonic : PHSUBSW +// Supported forms : (4 forms) +// +// * PHSUBSW mm, mm [SSSE3] +// * PHSUBSW m64, mm [SSSE3] +// * PHSUBSW xmm, xmm [SSSE3] +// * PHSUBSW m128, xmm [SSSE3] +// +func (self *Program) PHSUBSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PHSUBSW", 2, Operands { v0, v1 }) + // PHSUBSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x07) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHSUBSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x07) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PHSUBSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x07) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHSUBSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x07) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PHSUBSW") + } + return p +} + +// PHSUBW performs "Packed Horizontal Subtract Word Integers". +// +// Mnemonic : PHSUBW +// Supported forms : (4 forms) +// +// * PHSUBW mm, mm [SSSE3] +// * PHSUBW m64, mm [SSSE3] +// * PHSUBW xmm, xmm [SSSE3] +// * PHSUBW m128, xmm [SSSE3] +// +func (self *Program) PHSUBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PHSUBW", 2, Operands { v0, v1 }) + // PHSUBW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x05) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHSUBW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x05) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PHSUBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x05) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHSUBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x05) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PHSUBW") + } + return p +} + +// PI2FD performs "Packed Integer to Floating-Point Doubleword Conversion". +// +// Mnemonic : PI2FD +// Supported forms : (2 forms) +// +// * PI2FD mm, mm [3dnow!] +// * PI2FD m64, mm [3dnow!] +// +func (self *Program) PI2FD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PI2FD", 2, Operands { v0, v1 }) + // PI2FD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x0d) + }) + } + // PI2FD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x0d) + }) + } + if p.len == 0 { + panic("invalid operands for PI2FD") + } + return p +} + +// PI2FW performs "Packed Integer to Floating-Point Word Conversion". +// +// Mnemonic : PI2FW +// Supported forms : (2 forms) +// +// * PI2FW mm, mm [3dnow!+] +// * PI2FW m64, mm [3dnow!+] +// +func (self *Program) PI2FW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PI2FW", 2, Operands { v0, v1 }) + // PI2FW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x0c) + }) + } + // PI2FW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x0c) + }) + } + if p.len == 0 { + panic("invalid operands for PI2FW") + } + return p +} + +// PINSRB performs "Insert Byte". +// +// Mnemonic : PINSRB +// Supported forms : (2 forms) +// +// * PINSRB imm8, r32, xmm [SSE4.1] +// * PINSRB imm8, m8, xmm [SSE4.1] +// +func (self *Program) PINSRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PINSRB", 3, Operands { v0, v1, v2 }) + // PINSRB imm8, r32, xmm + if isImm8(v0) && isReg32(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x20) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PINSRB imm8, m8, xmm + if isImm8(v0) && isM8(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x20) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PINSRB") + } + return p +} + +// PINSRD performs "Insert Doubleword". +// +// Mnemonic : PINSRD +// Supported forms : (2 forms) +// +// * PINSRD imm8, r32, xmm [SSE4.1] +// * PINSRD imm8, m32, xmm [SSE4.1] +// +func (self *Program) PINSRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PINSRD", 3, Operands { v0, v1, v2 }) + // PINSRD imm8, r32, xmm + if isImm8(v0) && isReg32(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x22) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PINSRD imm8, m32, xmm + if isImm8(v0) && isM32(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x22) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PINSRD") + } + return p +} + +// PINSRQ performs "Insert Quadword". +// +// Mnemonic : PINSRQ +// Supported forms : (2 forms) +// +// * PINSRQ imm8, r64, xmm [SSE4.1] +// * PINSRQ imm8, m64, xmm [SSE4.1] +// +func (self *Program) PINSRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PINSRQ", 3, Operands { v0, v1, v2 }) + // PINSRQ imm8, r64, xmm + if isImm8(v0) && isReg64(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x48 | hcode(v[2]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x22) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PINSRQ imm8, m64, xmm + if isImm8(v0) && isM64(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexm(1, hcode(v[2]), addr(v[1])) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x22) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PINSRQ") + } + return p +} + +// PINSRW performs "Insert Word". +// +// Mnemonic : PINSRW +// Supported forms : (4 forms) +// +// * PINSRW imm8, r32, mm [MMX+] +// * PINSRW imm8, m16, mm [MMX+] +// * PINSRW imm8, r32, xmm [SSE2] +// * PINSRW imm8, m16, xmm [SSE2] +// +func (self *Program) PINSRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PINSRW", 3, Operands { v0, v1, v2 }) + // PINSRW imm8, r32, mm + if isImm8(v0) && isReg32(v1) && isMM(v2) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PINSRW imm8, m16, mm + if isImm8(v0) && isM16(v1) && isMM(v2) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc4) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // PINSRW imm8, r32, xmm + if isImm8(v0) && isReg32(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PINSRW imm8, m16, xmm + if isImm8(v0) && isM16(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc4) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PINSRW") + } + return p +} + +// PMADDUBSW performs "Multiply and Add Packed Signed and Unsigned Byte Integers". +// +// Mnemonic : PMADDUBSW +// Supported forms : (4 forms) +// +// * PMADDUBSW mm, mm [SSSE3] +// * PMADDUBSW m64, mm [SSSE3] +// * PMADDUBSW xmm, xmm [SSSE3] +// * PMADDUBSW m128, xmm [SSSE3] +// +func (self *Program) PMADDUBSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMADDUBSW", 2, Operands { v0, v1 }) + // PMADDUBSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x04) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMADDUBSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x04) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMADDUBSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x04) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMADDUBSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x04) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMADDUBSW") + } + return p +} + +// PMADDWD performs "Multiply and Add Packed Signed Word Integers". +// +// Mnemonic : PMADDWD +// Supported forms : (4 forms) +// +// * PMADDWD mm, mm [MMX] +// * PMADDWD m64, mm [MMX] +// * PMADDWD xmm, xmm [SSE2] +// * PMADDWD m128, xmm [SSE2] +// +func (self *Program) PMADDWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMADDWD", 2, Operands { v0, v1 }) + // PMADDWD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMADDWD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf5) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMADDWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMADDWD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf5) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMADDWD") + } + return p +} + +// PMAXSB performs "Maximum of Packed Signed Byte Integers". +// +// Mnemonic : PMAXSB +// Supported forms : (2 forms) +// +// * PMAXSB xmm, xmm [SSE4.1] +// * PMAXSB m128, xmm [SSE4.1] +// +func (self *Program) PMAXSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMAXSB", 2, Operands { v0, v1 }) + // PMAXSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMAXSB") + } + return p +} + +// PMAXSD performs "Maximum of Packed Signed Doubleword Integers". +// +// Mnemonic : PMAXSD +// Supported forms : (2 forms) +// +// * PMAXSD xmm, xmm [SSE4.1] +// * PMAXSD m128, xmm [SSE4.1] +// +func (self *Program) PMAXSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMAXSD", 2, Operands { v0, v1 }) + // PMAXSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXSD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMAXSD") + } + return p +} + +// PMAXSW performs "Maximum of Packed Signed Word Integers". +// +// Mnemonic : PMAXSW +// Supported forms : (4 forms) +// +// * PMAXSW mm, mm [MMX+] +// * PMAXSW m64, mm [MMX+] +// * PMAXSW xmm, xmm [SSE2] +// * PMAXSW m128, xmm [SSE2] +// +func (self *Program) PMAXSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMAXSW", 2, Operands { v0, v1 }) + // PMAXSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xee) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xee) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMAXSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xee) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xee) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMAXSW") + } + return p +} + +// PMAXUB performs "Maximum of Packed Unsigned Byte Integers". +// +// Mnemonic : PMAXUB +// Supported forms : (4 forms) +// +// * PMAXUB mm, mm [MMX+] +// * PMAXUB m64, mm [MMX+] +// * PMAXUB xmm, xmm [SSE2] +// * PMAXUB m128, xmm [SSE2] +// +func (self *Program) PMAXUB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMAXUB", 2, Operands { v0, v1 }) + // PMAXUB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xde) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXUB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xde) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMAXUB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xde) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXUB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xde) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMAXUB") + } + return p +} + +// PMAXUD performs "Maximum of Packed Unsigned Doubleword Integers". +// +// Mnemonic : PMAXUD +// Supported forms : (2 forms) +// +// * PMAXUD xmm, xmm [SSE4.1] +// * PMAXUD m128, xmm [SSE4.1] +// +func (self *Program) PMAXUD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMAXUD", 2, Operands { v0, v1 }) + // PMAXUD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXUD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMAXUD") + } + return p +} + +// PMAXUW performs "Maximum of Packed Unsigned Word Integers". +// +// Mnemonic : PMAXUW +// Supported forms : (2 forms) +// +// * PMAXUW xmm, xmm [SSE4.1] +// * PMAXUW m128, xmm [SSE4.1] +// +func (self *Program) PMAXUW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMAXUW", 2, Operands { v0, v1 }) + // PMAXUW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXUW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMAXUW") + } + return p +} + +// PMINSB performs "Minimum of Packed Signed Byte Integers". +// +// Mnemonic : PMINSB +// Supported forms : (2 forms) +// +// * PMINSB xmm, xmm [SSE4.1] +// * PMINSB m128, xmm [SSE4.1] +// +func (self *Program) PMINSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMINSB", 2, Operands { v0, v1 }) + // PMINSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x38) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x38) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMINSB") + } + return p +} + +// PMINSD performs "Minimum of Packed Signed Doubleword Integers". +// +// Mnemonic : PMINSD +// Supported forms : (2 forms) +// +// * PMINSD xmm, xmm [SSE4.1] +// * PMINSD m128, xmm [SSE4.1] +// +func (self *Program) PMINSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMINSD", 2, Operands { v0, v1 }) + // PMINSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINSD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x39) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMINSD") + } + return p +} + +// PMINSW performs "Minimum of Packed Signed Word Integers". +// +// Mnemonic : PMINSW +// Supported forms : (4 forms) +// +// * PMINSW mm, mm [MMX+] +// * PMINSW m64, mm [MMX+] +// * PMINSW xmm, xmm [SSE2] +// * PMINSW m128, xmm [SSE2] +// +func (self *Program) PMINSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMINSW", 2, Operands { v0, v1 }) + // PMINSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xea) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xea) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMINSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xea) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xea) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMINSW") + } + return p +} + +// PMINUB performs "Minimum of Packed Unsigned Byte Integers". +// +// Mnemonic : PMINUB +// Supported forms : (4 forms) +// +// * PMINUB mm, mm [MMX+] +// * PMINUB m64, mm [MMX+] +// * PMINUB xmm, xmm [SSE2] +// * PMINUB m128, xmm [SSE2] +// +func (self *Program) PMINUB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMINUB", 2, Operands { v0, v1 }) + // PMINUB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xda) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINUB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xda) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMINUB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xda) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINUB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xda) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMINUB") + } + return p +} + +// PMINUD performs "Minimum of Packed Unsigned Doubleword Integers". +// +// Mnemonic : PMINUD +// Supported forms : (2 forms) +// +// * PMINUD xmm, xmm [SSE4.1] +// * PMINUD m128, xmm [SSE4.1] +// +func (self *Program) PMINUD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMINUD", 2, Operands { v0, v1 }) + // PMINUD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINUD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMINUD") + } + return p +} + +// PMINUW performs "Minimum of Packed Unsigned Word Integers". +// +// Mnemonic : PMINUW +// Supported forms : (2 forms) +// +// * PMINUW xmm, xmm [SSE4.1] +// * PMINUW m128, xmm [SSE4.1] +// +func (self *Program) PMINUW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMINUW", 2, Operands { v0, v1 }) + // PMINUW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINUW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMINUW") + } + return p +} + +// PMOVMSKB performs "Move Byte Mask". +// +// Mnemonic : PMOVMSKB +// Supported forms : (2 forms) +// +// * PMOVMSKB mm, r32 [MMX+] +// * PMOVMSKB xmm, r32 [SSE2] +// +func (self *Program) PMOVMSKB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVMSKB", 2, Operands { v0, v1 }) + // PMOVMSKB mm, r32 + if isMM(v0) && isReg32(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVMSKB xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVMSKB") + } + return p +} + +// PMOVSXBD performs "Move Packed Byte Integers to Doubleword Integers with Sign Extension". +// +// Mnemonic : PMOVSXBD +// Supported forms : (2 forms) +// +// * PMOVSXBD xmm, xmm [SSE4.1] +// * PMOVSXBD m32, xmm [SSE4.1] +// +func (self *Program) PMOVSXBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVSXBD", 2, Operands { v0, v1 }) + // PMOVSXBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x21) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVSXBD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x21) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVSXBD") + } + return p +} + +// PMOVSXBQ performs "Move Packed Byte Integers to Quadword Integers with Sign Extension". +// +// Mnemonic : PMOVSXBQ +// Supported forms : (2 forms) +// +// * PMOVSXBQ xmm, xmm [SSE4.1] +// * PMOVSXBQ m16, xmm [SSE4.1] +// +func (self *Program) PMOVSXBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVSXBQ", 2, Operands { v0, v1 }) + // PMOVSXBQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x22) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVSXBQ m16, xmm + if isM16(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x22) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVSXBQ") + } + return p +} + +// PMOVSXBW performs "Move Packed Byte Integers to Word Integers with Sign Extension". +// +// Mnemonic : PMOVSXBW +// Supported forms : (2 forms) +// +// * PMOVSXBW xmm, xmm [SSE4.1] +// * PMOVSXBW m64, xmm [SSE4.1] +// +func (self *Program) PMOVSXBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVSXBW", 2, Operands { v0, v1 }) + // PMOVSXBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x20) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVSXBW m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x20) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVSXBW") + } + return p +} + +// PMOVSXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Sign Extension". +// +// Mnemonic : PMOVSXDQ +// Supported forms : (2 forms) +// +// * PMOVSXDQ xmm, xmm [SSE4.1] +// * PMOVSXDQ m64, xmm [SSE4.1] +// +func (self *Program) PMOVSXDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVSXDQ", 2, Operands { v0, v1 }) + // PMOVSXDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x25) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVSXDQ m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x25) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVSXDQ") + } + return p +} + +// PMOVSXWD performs "Move Packed Word Integers to Doubleword Integers with Sign Extension". +// +// Mnemonic : PMOVSXWD +// Supported forms : (2 forms) +// +// * PMOVSXWD xmm, xmm [SSE4.1] +// * PMOVSXWD m64, xmm [SSE4.1] +// +func (self *Program) PMOVSXWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVSXWD", 2, Operands { v0, v1 }) + // PMOVSXWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVSXWD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVSXWD") + } + return p +} + +// PMOVSXWQ performs "Move Packed Word Integers to Quadword Integers with Sign Extension". +// +// Mnemonic : PMOVSXWQ +// Supported forms : (2 forms) +// +// * PMOVSXWQ xmm, xmm [SSE4.1] +// * PMOVSXWQ m32, xmm [SSE4.1] +// +func (self *Program) PMOVSXWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVSXWQ", 2, Operands { v0, v1 }) + // PMOVSXWQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x24) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVSXWQ m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x24) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVSXWQ") + } + return p +} + +// PMOVZXBD performs "Move Packed Byte Integers to Doubleword Integers with Zero Extension". +// +// Mnemonic : PMOVZXBD +// Supported forms : (2 forms) +// +// * PMOVZXBD xmm, xmm [SSE4.1] +// * PMOVZXBD m32, xmm [SSE4.1] +// +func (self *Program) PMOVZXBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVZXBD", 2, Operands { v0, v1 }) + // PMOVZXBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x31) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVZXBD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x31) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVZXBD") + } + return p +} + +// PMOVZXBQ performs "Move Packed Byte Integers to Quadword Integers with Zero Extension". +// +// Mnemonic : PMOVZXBQ +// Supported forms : (2 forms) +// +// * PMOVZXBQ xmm, xmm [SSE4.1] +// * PMOVZXBQ m16, xmm [SSE4.1] +// +func (self *Program) PMOVZXBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVZXBQ", 2, Operands { v0, v1 }) + // PMOVZXBQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x32) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVZXBQ m16, xmm + if isM16(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x32) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVZXBQ") + } + return p +} + +// PMOVZXBW performs "Move Packed Byte Integers to Word Integers with Zero Extension". +// +// Mnemonic : PMOVZXBW +// Supported forms : (2 forms) +// +// * PMOVZXBW xmm, xmm [SSE4.1] +// * PMOVZXBW m64, xmm [SSE4.1] +// +func (self *Program) PMOVZXBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVZXBW", 2, Operands { v0, v1 }) + // PMOVZXBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x30) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVZXBW m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x30) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVZXBW") + } + return p +} + +// PMOVZXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Zero Extension". +// +// Mnemonic : PMOVZXDQ +// Supported forms : (2 forms) +// +// * PMOVZXDQ xmm, xmm [SSE4.1] +// * PMOVZXDQ m64, xmm [SSE4.1] +// +func (self *Program) PMOVZXDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVZXDQ", 2, Operands { v0, v1 }) + // PMOVZXDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x35) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVZXDQ m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x35) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVZXDQ") + } + return p +} + +// PMOVZXWD performs "Move Packed Word Integers to Doubleword Integers with Zero Extension". +// +// Mnemonic : PMOVZXWD +// Supported forms : (2 forms) +// +// * PMOVZXWD xmm, xmm [SSE4.1] +// * PMOVZXWD m64, xmm [SSE4.1] +// +func (self *Program) PMOVZXWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVZXWD", 2, Operands { v0, v1 }) + // PMOVZXWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVZXWD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVZXWD") + } + return p +} + +// PMOVZXWQ performs "Move Packed Word Integers to Quadword Integers with Zero Extension". +// +// Mnemonic : PMOVZXWQ +// Supported forms : (2 forms) +// +// * PMOVZXWQ xmm, xmm [SSE4.1] +// * PMOVZXWQ m32, xmm [SSE4.1] +// +func (self *Program) PMOVZXWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVZXWQ", 2, Operands { v0, v1 }) + // PMOVZXWQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x34) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVZXWQ m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x34) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVZXWQ") + } + return p +} + +// PMULDQ performs "Multiply Packed Signed Doubleword Integers and Store Quadword Result". +// +// Mnemonic : PMULDQ +// Supported forms : (2 forms) +// +// * PMULDQ xmm, xmm [SSE4.1] +// * PMULDQ m128, xmm [SSE4.1] +// +func (self *Program) PMULDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULDQ", 2, Operands { v0, v1 }) + // PMULDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMULDQ") + } + return p +} + +// PMULHRSW performs "Packed Multiply Signed Word Integers and Store High Result with Round and Scale". +// +// Mnemonic : PMULHRSW +// Supported forms : (4 forms) +// +// * PMULHRSW mm, mm [SSSE3] +// * PMULHRSW m64, mm [SSSE3] +// * PMULHRSW xmm, xmm [SSSE3] +// * PMULHRSW m128, xmm [SSSE3] +// +func (self *Program) PMULHRSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULHRSW", 2, Operands { v0, v1 }) + // PMULHRSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULHRSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMULHRSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULHRSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMULHRSW") + } + return p +} + +// PMULHRW performs "Packed Multiply High Rounded Word". +// +// Mnemonic : PMULHRW +// Supported forms : (2 forms) +// +// * PMULHRW mm, mm [3dnow!] +// * PMULHRW m64, mm [3dnow!] +// +func (self *Program) PMULHRW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULHRW", 2, Operands { v0, v1 }) + // PMULHRW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xb7) + }) + } + // PMULHRW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xb7) + }) + } + if p.len == 0 { + panic("invalid operands for PMULHRW") + } + return p +} + +// PMULHUW performs "Multiply Packed Unsigned Word Integers and Store High Result". +// +// Mnemonic : PMULHUW +// Supported forms : (4 forms) +// +// * PMULHUW mm, mm [MMX+] +// * PMULHUW m64, mm [MMX+] +// * PMULHUW xmm, xmm [SSE2] +// * PMULHUW m128, xmm [SSE2] +// +func (self *Program) PMULHUW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULHUW", 2, Operands { v0, v1 }) + // PMULHUW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULHUW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe4) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMULHUW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULHUW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe4) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMULHUW") + } + return p +} + +// PMULHW performs "Multiply Packed Signed Word Integers and Store High Result". +// +// Mnemonic : PMULHW +// Supported forms : (4 forms) +// +// * PMULHW mm, mm [MMX] +// * PMULHW m64, mm [MMX] +// * PMULHW xmm, xmm [SSE2] +// * PMULHW m128, xmm [SSE2] +// +func (self *Program) PMULHW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULHW", 2, Operands { v0, v1 }) + // PMULHW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULHW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe5) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMULHW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULHW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe5) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMULHW") + } + return p +} + +// PMULLD performs "Multiply Packed Signed Doubleword Integers and Store Low Result". +// +// Mnemonic : PMULLD +// Supported forms : (2 forms) +// +// * PMULLD xmm, xmm [SSE4.1] +// * PMULLD m128, xmm [SSE4.1] +// +func (self *Program) PMULLD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULLD", 2, Operands { v0, v1 }) + // PMULLD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x40) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULLD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x40) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMULLD") + } + return p +} + +// PMULLW performs "Multiply Packed Signed Word Integers and Store Low Result". +// +// Mnemonic : PMULLW +// Supported forms : (4 forms) +// +// * PMULLW mm, mm [MMX] +// * PMULLW m64, mm [MMX] +// * PMULLW xmm, xmm [SSE2] +// * PMULLW m128, xmm [SSE2] +// +func (self *Program) PMULLW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULLW", 2, Operands { v0, v1 }) + // PMULLW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULLW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd5) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMULLW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULLW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd5) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMULLW") + } + return p +} + +// PMULUDQ performs "Multiply Packed Unsigned Doubleword Integers". +// +// Mnemonic : PMULUDQ +// Supported forms : (4 forms) +// +// * PMULUDQ mm, mm [SSE2] +// * PMULUDQ m64, mm [SSE2] +// * PMULUDQ xmm, xmm [SSE2] +// * PMULUDQ m128, xmm [SSE2] +// +func (self *Program) PMULUDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULUDQ", 2, Operands { v0, v1 }) + // PMULUDQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULUDQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf4) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMULUDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULUDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf4) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMULUDQ") + } + return p +} + +// POPCNTL performs "Count of Number of Bits Set to 1". +// +// Mnemonic : POPCNT +// Supported forms : (2 forms) +// +// * POPCNTL r32, r32 [POPCNT] +// * POPCNTL m32, r32 [POPCNT] +// +func (self *Program) POPCNTL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("POPCNTL", 2, Operands { v0, v1 }) + // POPCNTL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_POPCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // POPCNTL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_POPCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xb8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for POPCNTL") + } + return p +} + +// POPCNTQ performs "Count of Number of Bits Set to 1". +// +// Mnemonic : POPCNT +// Supported forms : (2 forms) +// +// * POPCNTQ r64, r64 [POPCNT] +// * POPCNTQ m64, r64 [POPCNT] +// +func (self *Program) POPCNTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("POPCNTQ", 2, Operands { v0, v1 }) + // POPCNTQ r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_POPCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // POPCNTQ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_POPCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xb8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for POPCNTQ") + } + return p +} + +// POPCNTW performs "Count of Number of Bits Set to 1". +// +// Mnemonic : POPCNT +// Supported forms : (2 forms) +// +// * POPCNTW r16, r16 [POPCNT] +// * POPCNTW m16, r16 [POPCNT] +// +func (self *Program) POPCNTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("POPCNTW", 2, Operands { v0, v1 }) + // POPCNTW r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_POPCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // POPCNTW m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_POPCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xb8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for POPCNTW") + } + return p +} + +// POPQ performs "Pop a Value from the Stack". +// +// Mnemonic : POP +// Supported forms : (2 forms) +// +// * POPQ r64 +// * POPQ m64 +// +func (self *Program) POPQ(v0 interface{}) *Instruction { + p := self.alloc("POPQ", 1, Operands { v0 }) + // POPQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0x58 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0x8f) + m.emit(0xc0 | lcode(v[0])) + }) + } + // POPQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x8f) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for POPQ") + } + return p +} + +// POPW performs "Pop a Value from the Stack". +// +// Mnemonic : POP +// Supported forms : (2 forms) +// +// * POPW r16 +// * POPW m16 +// +func (self *Program) POPW(v0 interface{}) *Instruction { + p := self.alloc("POPW", 1, Operands { v0 }) + // POPW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0x58 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0x8f) + m.emit(0xc0 | lcode(v[0])) + }) + } + // POPW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0x8f) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for POPW") + } + return p +} + +// POR performs "Packed Bitwise Logical OR". +// +// Mnemonic : POR +// Supported forms : (4 forms) +// +// * POR mm, mm [MMX] +// * POR m64, mm [MMX] +// * POR xmm, xmm [SSE2] +// * POR m128, xmm [SSE2] +// +func (self *Program) POR(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("POR", 2, Operands { v0, v1 }) + // POR mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // POR m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xeb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // POR xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // POR m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xeb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for POR") + } + return p +} + +// PREFETCH performs "Prefetch Data into Caches". +// +// Mnemonic : PREFETCH +// Supported forms : (1 form) +// +// * PREFETCH m8 [PREFETCH] +// +func (self *Program) PREFETCH(v0 interface{}) *Instruction { + p := self.alloc("PREFETCH", 1, Operands { v0 }) + // PREFETCH m8 + if isM8(v0) { + self.require(ISA_PREFETCH) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0d) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PREFETCH") + } + return p +} + +// PREFETCHNTA performs "Prefetch Data Into Caches using NTA Hint". +// +// Mnemonic : PREFETCHNTA +// Supported forms : (1 form) +// +// * PREFETCHNTA m8 [MMX+] +// +func (self *Program) PREFETCHNTA(v0 interface{}) *Instruction { + p := self.alloc("PREFETCHNTA", 1, Operands { v0 }) + // PREFETCHNTA m8 + if isM8(v0) { + self.require(ISA_MMX_PLUS) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x18) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PREFETCHNTA") + } + return p +} + +// PREFETCHT0 performs "Prefetch Data Into Caches using T0 Hint". +// +// Mnemonic : PREFETCHT0 +// Supported forms : (1 form) +// +// * PREFETCHT0 m8 [MMX+] +// +func (self *Program) PREFETCHT0(v0 interface{}) *Instruction { + p := self.alloc("PREFETCHT0", 1, Operands { v0 }) + // PREFETCHT0 m8 + if isM8(v0) { + self.require(ISA_MMX_PLUS) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x18) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PREFETCHT0") + } + return p +} + +// PREFETCHT1 performs "Prefetch Data Into Caches using T1 Hint". +// +// Mnemonic : PREFETCHT1 +// Supported forms : (1 form) +// +// * PREFETCHT1 m8 [MMX+] +// +func (self *Program) PREFETCHT1(v0 interface{}) *Instruction { + p := self.alloc("PREFETCHT1", 1, Operands { v0 }) + // PREFETCHT1 m8 + if isM8(v0) { + self.require(ISA_MMX_PLUS) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x18) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PREFETCHT1") + } + return p +} + +// PREFETCHT2 performs "Prefetch Data Into Caches using T2 Hint". +// +// Mnemonic : PREFETCHT2 +// Supported forms : (1 form) +// +// * PREFETCHT2 m8 [MMX+] +// +func (self *Program) PREFETCHT2(v0 interface{}) *Instruction { + p := self.alloc("PREFETCHT2", 1, Operands { v0 }) + // PREFETCHT2 m8 + if isM8(v0) { + self.require(ISA_MMX_PLUS) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x18) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PREFETCHT2") + } + return p +} + +// PREFETCHW performs "Prefetch Data into Caches in Anticipation of a Write". +// +// Mnemonic : PREFETCHW +// Supported forms : (1 form) +// +// * PREFETCHW m8 [PREFETCHW] +// +func (self *Program) PREFETCHW(v0 interface{}) *Instruction { + p := self.alloc("PREFETCHW", 1, Operands { v0 }) + // PREFETCHW m8 + if isM8(v0) { + self.require(ISA_PREFETCHW) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0d) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PREFETCHW") + } + return p +} + +// PREFETCHWT1 performs "Prefetch Vector Data Into Caches with Intent to Write and T1 Hint". +// +// Mnemonic : PREFETCHWT1 +// Supported forms : (1 form) +// +// * PREFETCHWT1 m8 [PREFETCHWT1] +// +func (self *Program) PREFETCHWT1(v0 interface{}) *Instruction { + p := self.alloc("PREFETCHWT1", 1, Operands { v0 }) + // PREFETCHWT1 m8 + if isM8(v0) { + self.require(ISA_PREFETCHWT1) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0d) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PREFETCHWT1") + } + return p +} + +// PSADBW performs "Compute Sum of Absolute Differences". +// +// Mnemonic : PSADBW +// Supported forms : (4 forms) +// +// * PSADBW mm, mm [MMX+] +// * PSADBW m64, mm [MMX+] +// * PSADBW xmm, xmm [SSE2] +// * PSADBW m128, xmm [SSE2] +// +func (self *Program) PSADBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSADBW", 2, Operands { v0, v1 }) + // PSADBW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSADBW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSADBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSADBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSADBW") + } + return p +} + +// PSHUFB performs "Packed Shuffle Bytes". +// +// Mnemonic : PSHUFB +// Supported forms : (4 forms) +// +// * PSHUFB mm, mm [SSSE3] +// * PSHUFB m64, mm [SSSE3] +// * PSHUFB xmm, xmm [SSSE3] +// * PSHUFB m128, xmm [SSSE3] +// +func (self *Program) PSHUFB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSHUFB", 2, Operands { v0, v1 }) + // PSHUFB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x00) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSHUFB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x00) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSHUFB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x00) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSHUFB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x00) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSHUFB") + } + return p +} + +// PSHUFD performs "Shuffle Packed Doublewords". +// +// Mnemonic : PSHUFD +// Supported forms : (2 forms) +// +// * PSHUFD imm8, xmm, xmm [SSE2] +// * PSHUFD imm8, m128, xmm [SSE2] +// +func (self *Program) PSHUFD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PSHUFD", 3, Operands { v0, v1, v2 }) + // PSHUFD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSHUFD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PSHUFD") + } + return p +} + +// PSHUFHW performs "Shuffle Packed High Words". +// +// Mnemonic : PSHUFHW +// Supported forms : (2 forms) +// +// * PSHUFHW imm8, xmm, xmm [SSE2] +// * PSHUFHW imm8, m128, xmm [SSE2] +// +func (self *Program) PSHUFHW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PSHUFHW", 3, Operands { v0, v1, v2 }) + // PSHUFHW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSHUFHW imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PSHUFHW") + } + return p +} + +// PSHUFLW performs "Shuffle Packed Low Words". +// +// Mnemonic : PSHUFLW +// Supported forms : (2 forms) +// +// * PSHUFLW imm8, xmm, xmm [SSE2] +// * PSHUFLW imm8, m128, xmm [SSE2] +// +func (self *Program) PSHUFLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PSHUFLW", 3, Operands { v0, v1, v2 }) + // PSHUFLW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSHUFLW imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PSHUFLW") + } + return p +} + +// PSHUFW performs "Shuffle Packed Words". +// +// Mnemonic : PSHUFW +// Supported forms : (2 forms) +// +// * PSHUFW imm8, mm, mm [MMX+] +// * PSHUFW imm8, m64, mm [MMX+] +// +func (self *Program) PSHUFW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PSHUFW", 3, Operands { v0, v1, v2 }) + // PSHUFW imm8, mm, mm + if isImm8(v0) && isMM(v1) && isMM(v2) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSHUFW imm8, m64, mm + if isImm8(v0) && isM64(v1) && isMM(v2) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PSHUFW") + } + return p +} + +// PSIGNB performs "Packed Sign of Byte Integers". +// +// Mnemonic : PSIGNB +// Supported forms : (4 forms) +// +// * PSIGNB mm, mm [SSSE3] +// * PSIGNB m64, mm [SSSE3] +// * PSIGNB xmm, xmm [SSSE3] +// * PSIGNB m128, xmm [SSSE3] +// +func (self *Program) PSIGNB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSIGNB", 2, Operands { v0, v1 }) + // PSIGNB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x08) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSIGNB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x08) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSIGNB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x08) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSIGNB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x08) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSIGNB") + } + return p +} + +// PSIGND performs "Packed Sign of Doubleword Integers". +// +// Mnemonic : PSIGND +// Supported forms : (4 forms) +// +// * PSIGND mm, mm [SSSE3] +// * PSIGND m64, mm [SSSE3] +// * PSIGND xmm, xmm [SSSE3] +// * PSIGND m128, xmm [SSSE3] +// +func (self *Program) PSIGND(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSIGND", 2, Operands { v0, v1 }) + // PSIGND mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSIGND m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSIGND xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSIGND m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSIGND") + } + return p +} + +// PSIGNW performs "Packed Sign of Word Integers". +// +// Mnemonic : PSIGNW +// Supported forms : (4 forms) +// +// * PSIGNW mm, mm [SSSE3] +// * PSIGNW m64, mm [SSSE3] +// * PSIGNW xmm, xmm [SSSE3] +// * PSIGNW m128, xmm [SSSE3] +// +func (self *Program) PSIGNW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSIGNW", 2, Operands { v0, v1 }) + // PSIGNW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x09) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSIGNW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x09) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSIGNW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x09) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSIGNW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x09) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSIGNW") + } + return p +} + +// PSLLD performs "Shift Packed Doubleword Data Left Logical". +// +// Mnemonic : PSLLD +// Supported forms : (6 forms) +// +// * PSLLD imm8, mm [MMX] +// * PSLLD mm, mm [MMX] +// * PSLLD m64, mm [MMX] +// * PSLLD imm8, xmm [SSE2] +// * PSLLD xmm, xmm [SSE2] +// * PSLLD m128, xmm [SSE2] +// +func (self *Program) PSLLD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSLLD", 2, Operands { v0, v1 }) + // PSLLD imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x72) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSLLD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSLLD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSLLD imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x72) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSLLD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSLLD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSLLD") + } + return p +} + +// PSLLDQ performs "Shift Packed Double Quadword Left Logical". +// +// Mnemonic : PSLLDQ +// Supported forms : (1 form) +// +// * PSLLDQ imm8, xmm [SSE2] +// +func (self *Program) PSLLDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSLLDQ", 2, Operands { v0, v1 }) + // PSLLDQ imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x73) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PSLLDQ") + } + return p +} + +// PSLLQ performs "Shift Packed Quadword Data Left Logical". +// +// Mnemonic : PSLLQ +// Supported forms : (6 forms) +// +// * PSLLQ imm8, mm [MMX] +// * PSLLQ mm, mm [MMX] +// * PSLLQ m64, mm [MMX] +// * PSLLQ imm8, xmm [SSE2] +// * PSLLQ xmm, xmm [SSE2] +// * PSLLQ m128, xmm [SSE2] +// +func (self *Program) PSLLQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSLLQ", 2, Operands { v0, v1 }) + // PSLLQ imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x73) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSLLQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSLLQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSLLQ imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x73) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSLLQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSLLQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSLLQ") + } + return p +} + +// PSLLW performs "Shift Packed Word Data Left Logical". +// +// Mnemonic : PSLLW +// Supported forms : (6 forms) +// +// * PSLLW imm8, mm [MMX] +// * PSLLW mm, mm [MMX] +// * PSLLW m64, mm [MMX] +// * PSLLW imm8, xmm [SSE2] +// * PSLLW xmm, xmm [SSE2] +// * PSLLW m128, xmm [SSE2] +// +func (self *Program) PSLLW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSLLW", 2, Operands { v0, v1 }) + // PSLLW imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x71) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSLLW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSLLW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSLLW imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x71) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSLLW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSLLW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSLLW") + } + return p +} + +// PSRAD performs "Shift Packed Doubleword Data Right Arithmetic". +// +// Mnemonic : PSRAD +// Supported forms : (6 forms) +// +// * PSRAD imm8, mm [MMX] +// * PSRAD mm, mm [MMX] +// * PSRAD m64, mm [MMX] +// * PSRAD imm8, xmm [SSE2] +// * PSRAD xmm, xmm [SSE2] +// * PSRAD m128, xmm [SSE2] +// +func (self *Program) PSRAD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSRAD", 2, Operands { v0, v1 }) + // PSRAD imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRAD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRAD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSRAD imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRAD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRAD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSRAD") + } + return p +} + +// PSRAW performs "Shift Packed Word Data Right Arithmetic". +// +// Mnemonic : PSRAW +// Supported forms : (6 forms) +// +// * PSRAW imm8, mm [MMX] +// * PSRAW mm, mm [MMX] +// * PSRAW m64, mm [MMX] +// * PSRAW imm8, xmm [SSE2] +// * PSRAW xmm, xmm [SSE2] +// * PSRAW m128, xmm [SSE2] +// +func (self *Program) PSRAW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSRAW", 2, Operands { v0, v1 }) + // PSRAW imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x71) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRAW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRAW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSRAW imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x71) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRAW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRAW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSRAW") + } + return p +} + +// PSRLD performs "Shift Packed Doubleword Data Right Logical". +// +// Mnemonic : PSRLD +// Supported forms : (6 forms) +// +// * PSRLD imm8, mm [MMX] +// * PSRLD mm, mm [MMX] +// * PSRLD m64, mm [MMX] +// * PSRLD imm8, xmm [SSE2] +// * PSRLD xmm, xmm [SSE2] +// * PSRLD m128, xmm [SSE2] +// +func (self *Program) PSRLD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSRLD", 2, Operands { v0, v1 }) + // PSRLD imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x72) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRLD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRLD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSRLD imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x72) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRLD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRLD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSRLD") + } + return p +} + +// PSRLDQ performs "Shift Packed Double Quadword Right Logical". +// +// Mnemonic : PSRLDQ +// Supported forms : (1 form) +// +// * PSRLDQ imm8, xmm [SSE2] +// +func (self *Program) PSRLDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSRLDQ", 2, Operands { v0, v1 }) + // PSRLDQ imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x73) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PSRLDQ") + } + return p +} + +// PSRLQ performs "Shift Packed Quadword Data Right Logical". +// +// Mnemonic : PSRLQ +// Supported forms : (6 forms) +// +// * PSRLQ imm8, mm [MMX] +// * PSRLQ mm, mm [MMX] +// * PSRLQ m64, mm [MMX] +// * PSRLQ imm8, xmm [SSE2] +// * PSRLQ xmm, xmm [SSE2] +// * PSRLQ m128, xmm [SSE2] +// +func (self *Program) PSRLQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSRLQ", 2, Operands { v0, v1 }) + // PSRLQ imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x73) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRLQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRLQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSRLQ imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x73) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRLQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRLQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSRLQ") + } + return p +} + +// PSRLW performs "Shift Packed Word Data Right Logical". +// +// Mnemonic : PSRLW +// Supported forms : (6 forms) +// +// * PSRLW imm8, mm [MMX] +// * PSRLW mm, mm [MMX] +// * PSRLW m64, mm [MMX] +// * PSRLW imm8, xmm [SSE2] +// * PSRLW xmm, xmm [SSE2] +// * PSRLW m128, xmm [SSE2] +// +func (self *Program) PSRLW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSRLW", 2, Operands { v0, v1 }) + // PSRLW imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x71) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRLW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRLW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSRLW imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x71) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRLW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRLW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSRLW") + } + return p +} + +// PSUBB performs "Subtract Packed Byte Integers". +// +// Mnemonic : PSUBB +// Supported forms : (4 forms) +// +// * PSUBB mm, mm [MMX] +// * PSUBB m64, mm [MMX] +// * PSUBB xmm, xmm [SSE2] +// * PSUBB m128, xmm [SSE2] +// +func (self *Program) PSUBB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBB", 2, Operands { v0, v1 }) + // PSUBB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBB") + } + return p +} + +// PSUBD performs "Subtract Packed Doubleword Integers". +// +// Mnemonic : PSUBD +// Supported forms : (4 forms) +// +// * PSUBD mm, mm [MMX] +// * PSUBD m64, mm [MMX] +// * PSUBD xmm, xmm [SSE2] +// * PSUBD m128, xmm [SSE2] +// +func (self *Program) PSUBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBD", 2, Operands { v0, v1 }) + // PSUBD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfa) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfa) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfa) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfa) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBD") + } + return p +} + +// PSUBQ performs "Subtract Packed Quadword Integers". +// +// Mnemonic : PSUBQ +// Supported forms : (4 forms) +// +// * PSUBQ mm, mm [SSE2] +// * PSUBQ m64, mm [SSE2] +// * PSUBQ xmm, xmm [SSE2] +// * PSUBQ m128, xmm [SSE2] +// +func (self *Program) PSUBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBQ", 2, Operands { v0, v1 }) + // PSUBQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBQ") + } + return p +} + +// PSUBSB performs "Subtract Packed Signed Byte Integers with Signed Saturation". +// +// Mnemonic : PSUBSB +// Supported forms : (4 forms) +// +// * PSUBSB mm, mm [MMX] +// * PSUBSB m64, mm [MMX] +// * PSUBSB xmm, xmm [SSE2] +// * PSUBSB m128, xmm [SSE2] +// +func (self *Program) PSUBSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBSB", 2, Operands { v0, v1 }) + // PSUBSB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBSB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBSB") + } + return p +} + +// PSUBSW performs "Subtract Packed Signed Word Integers with Signed Saturation". +// +// Mnemonic : PSUBSW +// Supported forms : (4 forms) +// +// * PSUBSW mm, mm [MMX] +// * PSUBSW m64, mm [MMX] +// * PSUBSW xmm, xmm [SSE2] +// * PSUBSW m128, xmm [SSE2] +// +func (self *Program) PSUBSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBSW", 2, Operands { v0, v1 }) + // PSUBSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe9) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe9) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe9) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe9) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBSW") + } + return p +} + +// PSUBUSB performs "Subtract Packed Unsigned Byte Integers with Unsigned Saturation". +// +// Mnemonic : PSUBUSB +// Supported forms : (4 forms) +// +// * PSUBUSB mm, mm [MMX] +// * PSUBUSB m64, mm [MMX] +// * PSUBUSB xmm, xmm [SSE2] +// * PSUBUSB m128, xmm [SSE2] +// +func (self *Program) PSUBUSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBUSB", 2, Operands { v0, v1 }) + // PSUBUSB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBUSB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBUSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBUSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBUSB") + } + return p +} + +// PSUBUSW performs "Subtract Packed Unsigned Word Integers with Unsigned Saturation". +// +// Mnemonic : PSUBUSW +// Supported forms : (4 forms) +// +// * PSUBUSW mm, mm [MMX] +// * PSUBUSW m64, mm [MMX] +// * PSUBUSW xmm, xmm [SSE2] +// * PSUBUSW m128, xmm [SSE2] +// +func (self *Program) PSUBUSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBUSW", 2, Operands { v0, v1 }) + // PSUBUSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd9) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBUSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd9) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBUSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd9) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBUSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd9) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBUSW") + } + return p +} + +// PSUBW performs "Subtract Packed Word Integers". +// +// Mnemonic : PSUBW +// Supported forms : (4 forms) +// +// * PSUBW mm, mm [MMX] +// * PSUBW m64, mm [MMX] +// * PSUBW xmm, xmm [SSE2] +// * PSUBW m128, xmm [SSE2] +// +func (self *Program) PSUBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBW", 2, Operands { v0, v1 }) + // PSUBW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf9) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf9) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf9) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf9) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBW") + } + return p +} + +// PSWAPD performs "Packed Swap Doubleword". +// +// Mnemonic : PSWAPD +// Supported forms : (2 forms) +// +// * PSWAPD mm, mm [3dnow!+] +// * PSWAPD m64, mm [3dnow!+] +// +func (self *Program) PSWAPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSWAPD", 2, Operands { v0, v1 }) + // PSWAPD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xbb) + }) + } + // PSWAPD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xbb) + }) + } + if p.len == 0 { + panic("invalid operands for PSWAPD") + } + return p +} + +// PTEST performs "Packed Logical Compare". +// +// Mnemonic : PTEST +// Supported forms : (2 forms) +// +// * PTEST xmm, xmm [SSE4.1] +// * PTEST m128, xmm [SSE4.1] +// +func (self *Program) PTEST(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PTEST", 2, Operands { v0, v1 }) + // PTEST xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x17) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PTEST m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x17) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PTEST") + } + return p +} + +// PUNPCKHBW performs "Unpack and Interleave High-Order Bytes into Words". +// +// Mnemonic : PUNPCKHBW +// Supported forms : (4 forms) +// +// * PUNPCKHBW mm, mm [MMX] +// * PUNPCKHBW m64, mm [MMX] +// * PUNPCKHBW xmm, xmm [SSE2] +// * PUNPCKHBW m128, xmm [SSE2] +// +func (self *Program) PUNPCKHBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKHBW", 2, Operands { v0, v1 }) + // PUNPCKHBW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x68) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKHBW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x68) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PUNPCKHBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x68) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKHBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x68) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKHBW") + } + return p +} + +// PUNPCKHDQ performs "Unpack and Interleave High-Order Doublewords into Quadwords". +// +// Mnemonic : PUNPCKHDQ +// Supported forms : (4 forms) +// +// * PUNPCKHDQ mm, mm [MMX] +// * PUNPCKHDQ m64, mm [MMX] +// * PUNPCKHDQ xmm, xmm [SSE2] +// * PUNPCKHDQ m128, xmm [SSE2] +// +func (self *Program) PUNPCKHDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKHDQ", 2, Operands { v0, v1 }) + // PUNPCKHDQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKHDQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PUNPCKHDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKHDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKHDQ") + } + return p +} + +// PUNPCKHQDQ performs "Unpack and Interleave High-Order Quadwords into Double Quadwords". +// +// Mnemonic : PUNPCKHQDQ +// Supported forms : (2 forms) +// +// * PUNPCKHQDQ xmm, xmm [SSE2] +// * PUNPCKHQDQ m128, xmm [SSE2] +// +func (self *Program) PUNPCKHQDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKHQDQ", 2, Operands { v0, v1 }) + // PUNPCKHQDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKHQDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKHQDQ") + } + return p +} + +// PUNPCKHWD performs "Unpack and Interleave High-Order Words into Doublewords". +// +// Mnemonic : PUNPCKHWD +// Supported forms : (4 forms) +// +// * PUNPCKHWD mm, mm [MMX] +// * PUNPCKHWD m64, mm [MMX] +// * PUNPCKHWD xmm, xmm [SSE2] +// * PUNPCKHWD m128, xmm [SSE2] +// +func (self *Program) PUNPCKHWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKHWD", 2, Operands { v0, v1 }) + // PUNPCKHWD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x69) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKHWD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x69) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PUNPCKHWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x69) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKHWD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x69) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKHWD") + } + return p +} + +// PUNPCKLBW performs "Unpack and Interleave Low-Order Bytes into Words". +// +// Mnemonic : PUNPCKLBW +// Supported forms : (4 forms) +// +// * PUNPCKLBW mm, mm [MMX] +// * PUNPCKLBW m32, mm [MMX] +// * PUNPCKLBW xmm, xmm [SSE2] +// * PUNPCKLBW m128, xmm [SSE2] +// +func (self *Program) PUNPCKLBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKLBW", 2, Operands { v0, v1 }) + // PUNPCKLBW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x60) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKLBW m32, mm + if isM32(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x60) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PUNPCKLBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x60) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKLBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x60) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKLBW") + } + return p +} + +// PUNPCKLDQ performs "Unpack and Interleave Low-Order Doublewords into Quadwords". +// +// Mnemonic : PUNPCKLDQ +// Supported forms : (4 forms) +// +// * PUNPCKLDQ mm, mm [MMX] +// * PUNPCKLDQ m32, mm [MMX] +// * PUNPCKLDQ xmm, xmm [SSE2] +// * PUNPCKLDQ m128, xmm [SSE2] +// +func (self *Program) PUNPCKLDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKLDQ", 2, Operands { v0, v1 }) + // PUNPCKLDQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x62) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKLDQ m32, mm + if isM32(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x62) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PUNPCKLDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x62) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKLDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x62) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKLDQ") + } + return p +} + +// PUNPCKLQDQ performs "Unpack and Interleave Low-Order Quadwords into Double Quadwords". +// +// Mnemonic : PUNPCKLQDQ +// Supported forms : (2 forms) +// +// * PUNPCKLQDQ xmm, xmm [SSE2] +// * PUNPCKLQDQ m128, xmm [SSE2] +// +func (self *Program) PUNPCKLQDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKLQDQ", 2, Operands { v0, v1 }) + // PUNPCKLQDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKLQDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKLQDQ") + } + return p +} + +// PUNPCKLWD performs "Unpack and Interleave Low-Order Words into Doublewords". +// +// Mnemonic : PUNPCKLWD +// Supported forms : (4 forms) +// +// * PUNPCKLWD mm, mm [MMX] +// * PUNPCKLWD m32, mm [MMX] +// * PUNPCKLWD xmm, xmm [SSE2] +// * PUNPCKLWD m128, xmm [SSE2] +// +func (self *Program) PUNPCKLWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKLWD", 2, Operands { v0, v1 }) + // PUNPCKLWD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x61) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKLWD m32, mm + if isM32(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x61) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PUNPCKLWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x61) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKLWD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x61) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKLWD") + } + return p +} + +// PUSHQ performs "Push Value Onto the Stack". +// +// Mnemonic : PUSH +// Supported forms : (4 forms) +// +// * PUSHQ imm8 +// * PUSHQ imm32 +// * PUSHQ r64 +// * PUSHQ m64 +// +func (self *Program) PUSHQ(v0 interface{}) *Instruction { + p := self.alloc("PUSHQ", 1, Operands { v0 }) + // PUSHQ imm8 + if isImm8Ext(v0, 8) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x6a) + m.imm1(toImmAny(v[0])) + }) + } + // PUSHQ imm32 + if isImm32Ext(v0, 8) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x68) + m.imm4(toImmAny(v[0])) + }) + } + // PUSHQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0x50 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xf0 | lcode(v[0])) + }) + } + // PUSHQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUSHQ") + } + return p +} + +// PUSHW performs "Push Value Onto the Stack". +// +// Mnemonic : PUSH +// Supported forms : (2 forms) +// +// * PUSHW r16 +// * PUSHW m16 +// +func (self *Program) PUSHW(v0 interface{}) *Instruction { + p := self.alloc("PUSHW", 1, Operands { v0 }) + // PUSHW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0x50 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xf0 | lcode(v[0])) + }) + } + // PUSHW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUSHW") + } + return p +} + +// PXOR performs "Packed Bitwise Logical Exclusive OR". +// +// Mnemonic : PXOR +// Supported forms : (4 forms) +// +// * PXOR mm, mm [MMX] +// * PXOR m64, mm [MMX] +// * PXOR xmm, xmm [SSE2] +// * PXOR m128, xmm [SSE2] +// +func (self *Program) PXOR(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PXOR", 2, Operands { v0, v1 }) + // PXOR mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xef) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PXOR m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xef) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PXOR xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xef) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PXOR m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xef) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PXOR") + } + return p +} + +// RCLB performs "Rotate Left through Carry Flag". +// +// Mnemonic : RCL +// Supported forms : (6 forms) +// +// * RCLB 1, r8 +// * RCLB imm8, r8 +// * RCLB cl, r8 +// * RCLB 1, m8 +// * RCLB imm8, m8 +// * RCLB cl, m8 +// +func (self *Program) RCLB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCLB", 2, Operands { v0, v1 }) + // RCLB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCLB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(2, addr(v[1]), 1) + }) + } + // RCLB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCLB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(2, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCLB") + } + return p +} + +// RCLL performs "Rotate Left through Carry Flag". +// +// Mnemonic : RCL +// Supported forms : (6 forms) +// +// * RCLL 1, r32 +// * RCLL imm8, r32 +// * RCLL cl, r32 +// * RCLL 1, m32 +// * RCLL imm8, m32 +// * RCLL cl, m32 +// +func (self *Program) RCLL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCLL", 2, Operands { v0, v1 }) + // RCLL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCLL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(2, addr(v[1]), 1) + }) + } + // RCLL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCLL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(2, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCLL") + } + return p +} + +// RCLQ performs "Rotate Left through Carry Flag". +// +// Mnemonic : RCL +// Supported forms : (6 forms) +// +// * RCLQ 1, r64 +// * RCLQ imm8, r64 +// * RCLQ cl, r64 +// * RCLQ 1, m64 +// * RCLQ imm8, m64 +// * RCLQ cl, m64 +// +func (self *Program) RCLQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCLQ", 2, Operands { v0, v1 }) + // RCLQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCLQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(2, addr(v[1]), 1) + }) + } + // RCLQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCLQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(2, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCLQ") + } + return p +} + +// RCLW performs "Rotate Left through Carry Flag". +// +// Mnemonic : RCL +// Supported forms : (6 forms) +// +// * RCLW 1, r16 +// * RCLW imm8, r16 +// * RCLW cl, r16 +// * RCLW 1, m16 +// * RCLW imm8, m16 +// * RCLW cl, m16 +// +func (self *Program) RCLW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCLW", 2, Operands { v0, v1 }) + // RCLW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCLW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(2, addr(v[1]), 1) + }) + } + // RCLW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCLW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(2, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCLW") + } + return p +} + +// RCPPS performs "Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : RCPPS +// Supported forms : (2 forms) +// +// * RCPPS xmm, xmm [SSE] +// * RCPPS m128, xmm [SSE] +// +func (self *Program) RCPPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCPPS", 2, Operands { v0, v1 }) + // RCPPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x53) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // RCPPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x53) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCPPS") + } + return p +} + +// RCPSS performs "Compute Approximate Reciprocal of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : RCPSS +// Supported forms : (2 forms) +// +// * RCPSS xmm, xmm [SSE] +// * RCPSS m32, xmm [SSE] +// +func (self *Program) RCPSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCPSS", 2, Operands { v0, v1 }) + // RCPSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x53) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // RCPSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x53) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCPSS") + } + return p +} + +// RCRB performs "Rotate Right through Carry Flag". +// +// Mnemonic : RCR +// Supported forms : (6 forms) +// +// * RCRB 1, r8 +// * RCRB imm8, r8 +// * RCRB cl, r8 +// * RCRB 1, m8 +// * RCRB imm8, m8 +// * RCRB cl, m8 +// +func (self *Program) RCRB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCRB", 2, Operands { v0, v1 }) + // RCRB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCRB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(3, addr(v[1]), 1) + }) + } + // RCRB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCRB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(3, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCRB") + } + return p +} + +// RCRL performs "Rotate Right through Carry Flag". +// +// Mnemonic : RCR +// Supported forms : (6 forms) +// +// * RCRL 1, r32 +// * RCRL imm8, r32 +// * RCRL cl, r32 +// * RCRL 1, m32 +// * RCRL imm8, m32 +// * RCRL cl, m32 +// +func (self *Program) RCRL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCRL", 2, Operands { v0, v1 }) + // RCRL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCRL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(3, addr(v[1]), 1) + }) + } + // RCRL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCRL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(3, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCRL") + } + return p +} + +// RCRQ performs "Rotate Right through Carry Flag". +// +// Mnemonic : RCR +// Supported forms : (6 forms) +// +// * RCRQ 1, r64 +// * RCRQ imm8, r64 +// * RCRQ cl, r64 +// * RCRQ 1, m64 +// * RCRQ imm8, m64 +// * RCRQ cl, m64 +// +func (self *Program) RCRQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCRQ", 2, Operands { v0, v1 }) + // RCRQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCRQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(3, addr(v[1]), 1) + }) + } + // RCRQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCRQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(3, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCRQ") + } + return p +} + +// RCRW performs "Rotate Right through Carry Flag". +// +// Mnemonic : RCR +// Supported forms : (6 forms) +// +// * RCRW 1, r16 +// * RCRW imm8, r16 +// * RCRW cl, r16 +// * RCRW 1, m16 +// * RCRW imm8, m16 +// * RCRW cl, m16 +// +func (self *Program) RCRW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCRW", 2, Operands { v0, v1 }) + // RCRW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCRW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(3, addr(v[1]), 1) + }) + } + // RCRW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCRW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(3, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCRW") + } + return p +} + +// RDRAND performs "Read Random Number". +// +// Mnemonic : RDRAND +// Supported forms : (3 forms) +// +// * RDRAND r16 [RDRAND] +// * RDRAND r32 [RDRAND] +// * RDRAND r64 [RDRAND] +// +func (self *Program) RDRAND(v0 interface{}) *Instruction { + p := self.alloc("RDRAND", 1, Operands { v0 }) + // RDRAND r16 + if isReg16(v0) { + self.require(ISA_RDRAND) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0x0f) + m.emit(0xc7) + m.emit(0xf0 | lcode(v[0])) + }) + } + // RDRAND r32 + if isReg32(v0) { + self.require(ISA_RDRAND) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0x0f) + m.emit(0xc7) + m.emit(0xf0 | lcode(v[0])) + }) + } + // RDRAND r64 + if isReg64(v0) { + self.require(ISA_RDRAND) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xc7) + m.emit(0xf0 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for RDRAND") + } + return p +} + +// RDSEED performs "Read Random SEED". +// +// Mnemonic : RDSEED +// Supported forms : (3 forms) +// +// * RDSEED r16 [RDSEED] +// * RDSEED r32 [RDSEED] +// * RDSEED r64 [RDSEED] +// +func (self *Program) RDSEED(v0 interface{}) *Instruction { + p := self.alloc("RDSEED", 1, Operands { v0 }) + // RDSEED r16 + if isReg16(v0) { + self.require(ISA_RDSEED) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0x0f) + m.emit(0xc7) + m.emit(0xf8 | lcode(v[0])) + }) + } + // RDSEED r32 + if isReg32(v0) { + self.require(ISA_RDSEED) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0x0f) + m.emit(0xc7) + m.emit(0xf8 | lcode(v[0])) + }) + } + // RDSEED r64 + if isReg64(v0) { + self.require(ISA_RDSEED) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xc7) + m.emit(0xf8 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for RDSEED") + } + return p +} + +// RDTSC performs "Read Time-Stamp Counter". +// +// Mnemonic : RDTSC +// Supported forms : (1 form) +// +// * RDTSC [RDTSC] +// +func (self *Program) RDTSC() *Instruction { + p := self.alloc("RDTSC", 0, Operands { }) + // RDTSC + self.require(ISA_RDTSC) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x31) + }) + return p +} + +// RDTSCP performs "Read Time-Stamp Counter and Processor ID". +// +// Mnemonic : RDTSCP +// Supported forms : (1 form) +// +// * RDTSCP [RDTSCP] +// +func (self *Program) RDTSCP() *Instruction { + p := self.alloc("RDTSCP", 0, Operands { }) + // RDTSCP + self.require(ISA_RDTSCP) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x01) + m.emit(0xf9) + }) + return p +} + +// RET performs "Return from Procedure". +// +// Mnemonic : RET +// Supported forms : (2 forms) +// +// * RET +// * RET imm16 +// +func (self *Program) RET(vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("RET", 0, Operands { }) + case 1 : p = self.alloc("RET", 1, Operands { vv[0] }) + default : panic("instruction RET takes 0 or 1 operands") + } + // RET + if len(vv) == 0 { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc3) + }) + } + // RET imm16 + if len(vv) == 1 && isImm16(vv[0]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc2) + m.imm2(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for RET") + } + return p +} + +// ROLB performs "Rotate Left". +// +// Mnemonic : ROL +// Supported forms : (6 forms) +// +// * ROLB 1, r8 +// * ROLB imm8, r8 +// * ROLB cl, r8 +// * ROLB 1, m8 +// * ROLB imm8, m8 +// * ROLB cl, m8 +// +func (self *Program) ROLB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ROLB", 2, Operands { v0, v1 }) + // ROLB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROLB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(0, addr(v[1]), 1) + }) + } + // ROLB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ROLB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(0, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ROLB") + } + return p +} + +// ROLL performs "Rotate Left". +// +// Mnemonic : ROL +// Supported forms : (6 forms) +// +// * ROLL 1, r32 +// * ROLL imm8, r32 +// * ROLL cl, r32 +// * ROLL 1, m32 +// * ROLL imm8, m32 +// * ROLL cl, m32 +// +func (self *Program) ROLL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ROLL", 2, Operands { v0, v1 }) + // ROLL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROLL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(0, addr(v[1]), 1) + }) + } + // ROLL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ROLL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(0, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ROLL") + } + return p +} + +// ROLQ performs "Rotate Left". +// +// Mnemonic : ROL +// Supported forms : (6 forms) +// +// * ROLQ 1, r64 +// * ROLQ imm8, r64 +// * ROLQ cl, r64 +// * ROLQ 1, m64 +// * ROLQ imm8, m64 +// * ROLQ cl, m64 +// +func (self *Program) ROLQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ROLQ", 2, Operands { v0, v1 }) + // ROLQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROLQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(0, addr(v[1]), 1) + }) + } + // ROLQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ROLQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(0, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ROLQ") + } + return p +} + +// ROLW performs "Rotate Left". +// +// Mnemonic : ROL +// Supported forms : (6 forms) +// +// * ROLW 1, r16 +// * ROLW imm8, r16 +// * ROLW cl, r16 +// * ROLW 1, m16 +// * ROLW imm8, m16 +// * ROLW cl, m16 +// +func (self *Program) ROLW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ROLW", 2, Operands { v0, v1 }) + // ROLW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROLW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(0, addr(v[1]), 1) + }) + } + // ROLW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ROLW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(0, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ROLW") + } + return p +} + +// RORB performs "Rotate Right". +// +// Mnemonic : ROR +// Supported forms : (6 forms) +// +// * RORB 1, r8 +// * RORB imm8, r8 +// * RORB cl, r8 +// * RORB 1, m8 +// * RORB imm8, m8 +// * RORB cl, m8 +// +func (self *Program) RORB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RORB", 2, Operands { v0, v1 }) + // RORB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RORB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(1, addr(v[1]), 1) + }) + } + // RORB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RORB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(1, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RORB") + } + return p +} + +// RORL performs "Rotate Right". +// +// Mnemonic : ROR +// Supported forms : (6 forms) +// +// * RORL 1, r32 +// * RORL imm8, r32 +// * RORL cl, r32 +// * RORL 1, m32 +// * RORL imm8, m32 +// * RORL cl, m32 +// +func (self *Program) RORL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RORL", 2, Operands { v0, v1 }) + // RORL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RORL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(1, addr(v[1]), 1) + }) + } + // RORL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RORL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(1, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RORL") + } + return p +} + +// RORQ performs "Rotate Right". +// +// Mnemonic : ROR +// Supported forms : (6 forms) +// +// * RORQ 1, r64 +// * RORQ imm8, r64 +// * RORQ cl, r64 +// * RORQ 1, m64 +// * RORQ imm8, m64 +// * RORQ cl, m64 +// +func (self *Program) RORQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RORQ", 2, Operands { v0, v1 }) + // RORQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RORQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(1, addr(v[1]), 1) + }) + } + // RORQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RORQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(1, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RORQ") + } + return p +} + +// RORW performs "Rotate Right". +// +// Mnemonic : ROR +// Supported forms : (6 forms) +// +// * RORW 1, r16 +// * RORW imm8, r16 +// * RORW cl, r16 +// * RORW 1, m16 +// * RORW imm8, m16 +// * RORW cl, m16 +// +func (self *Program) RORW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RORW", 2, Operands { v0, v1 }) + // RORW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RORW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(1, addr(v[1]), 1) + }) + } + // RORW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RORW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(1, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RORW") + } + return p +} + +// RORXL performs "Rotate Right Logical Without Affecting Flags". +// +// Mnemonic : RORX +// Supported forms : (2 forms) +// +// * RORXL imm8, r32, r32 [BMI2] +// * RORXL imm8, m32, r32 [BMI2] +// +func (self *Program) RORXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("RORXL", 3, Operands { v0, v1, v2 }) + // RORXL imm8, r32, r32 + if isImm8(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7b) + m.emit(0xf0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RORXL imm8, m32, r32 + if isImm8(v0) && isM32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x03, hcode(v[2]), addr(v[1]), 0) + m.emit(0xf0) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for RORXL") + } + return p +} + +// RORXQ performs "Rotate Right Logical Without Affecting Flags". +// +// Mnemonic : RORX +// Supported forms : (2 forms) +// +// * RORXQ imm8, r64, r64 [BMI2] +// * RORXQ imm8, m64, r64 [BMI2] +// +func (self *Program) RORXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("RORXQ", 3, Operands { v0, v1, v2 }) + // RORXQ imm8, r64, r64 + if isImm8(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xfb) + m.emit(0xf0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RORXQ imm8, m64, r64 + if isImm8(v0) && isM64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x83, hcode(v[2]), addr(v[1]), 0) + m.emit(0xf0) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for RORXQ") + } + return p +} + +// ROUNDPD performs "Round Packed Double Precision Floating-Point Values". +// +// Mnemonic : ROUNDPD +// Supported forms : (2 forms) +// +// * ROUNDPD imm8, xmm, xmm [SSE4.1] +// * ROUNDPD imm8, m128, xmm [SSE4.1] +// +func (self *Program) ROUNDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("ROUNDPD", 3, Operands { v0, v1, v2 }) + // ROUNDPD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROUNDPD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for ROUNDPD") + } + return p +} + +// ROUNDPS performs "Round Packed Single Precision Floating-Point Values". +// +// Mnemonic : ROUNDPS +// Supported forms : (2 forms) +// +// * ROUNDPS imm8, xmm, xmm [SSE4.1] +// * ROUNDPS imm8, m128, xmm [SSE4.1] +// +func (self *Program) ROUNDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("ROUNDPS", 3, Operands { v0, v1, v2 }) + // ROUNDPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROUNDPS imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for ROUNDPS") + } + return p +} + +// ROUNDSD performs "Round Scalar Double Precision Floating-Point Values". +// +// Mnemonic : ROUNDSD +// Supported forms : (2 forms) +// +// * ROUNDSD imm8, xmm, xmm [SSE4.1] +// * ROUNDSD imm8, m64, xmm [SSE4.1] +// +func (self *Program) ROUNDSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("ROUNDSD", 3, Operands { v0, v1, v2 }) + // ROUNDSD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROUNDSD imm8, m64, xmm + if isImm8(v0) && isM64(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0b) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for ROUNDSD") + } + return p +} + +// ROUNDSS performs "Round Scalar Single Precision Floating-Point Values". +// +// Mnemonic : ROUNDSS +// Supported forms : (2 forms) +// +// * ROUNDSS imm8, xmm, xmm [SSE4.1] +// * ROUNDSS imm8, m32, xmm [SSE4.1] +// +func (self *Program) ROUNDSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("ROUNDSS", 3, Operands { v0, v1, v2 }) + // ROUNDSS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROUNDSS imm8, m32, xmm + if isImm8(v0) && isM32(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0a) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for ROUNDSS") + } + return p +} + +// RSQRTPS performs "Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : RSQRTPS +// Supported forms : (2 forms) +// +// * RSQRTPS xmm, xmm [SSE] +// * RSQRTPS m128, xmm [SSE] +// +func (self *Program) RSQRTPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RSQRTPS", 2, Operands { v0, v1 }) + // RSQRTPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x52) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // RSQRTPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x52) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RSQRTPS") + } + return p +} + +// RSQRTSS performs "Compute Reciprocal of Square Root of Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : RSQRTSS +// Supported forms : (2 forms) +// +// * RSQRTSS xmm, xmm [SSE] +// * RSQRTSS m32, xmm [SSE] +// +func (self *Program) RSQRTSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RSQRTSS", 2, Operands { v0, v1 }) + // RSQRTSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x52) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // RSQRTSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x52) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RSQRTSS") + } + return p +} + +// SALB performs "Arithmetic Shift Left". +// +// Mnemonic : SAL +// Supported forms : (6 forms) +// +// * SALB 1, r8 +// * SALB imm8, r8 +// * SALB cl, r8 +// * SALB 1, m8 +// * SALB imm8, m8 +// * SALB cl, m8 +// +func (self *Program) SALB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SALB", 2, Operands { v0, v1 }) + // SALB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SALB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SALB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SALB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SALB") + } + return p +} + +// SALL performs "Arithmetic Shift Left". +// +// Mnemonic : SAL +// Supported forms : (6 forms) +// +// * SALL 1, r32 +// * SALL imm8, r32 +// * SALL cl, r32 +// * SALL 1, m32 +// * SALL imm8, m32 +// * SALL cl, m32 +// +func (self *Program) SALL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SALL", 2, Operands { v0, v1 }) + // SALL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SALL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SALL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SALL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SALL") + } + return p +} + +// SALQ performs "Arithmetic Shift Left". +// +// Mnemonic : SAL +// Supported forms : (6 forms) +// +// * SALQ 1, r64 +// * SALQ imm8, r64 +// * SALQ cl, r64 +// * SALQ 1, m64 +// * SALQ imm8, m64 +// * SALQ cl, m64 +// +func (self *Program) SALQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SALQ", 2, Operands { v0, v1 }) + // SALQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SALQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SALQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SALQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SALQ") + } + return p +} + +// SALW performs "Arithmetic Shift Left". +// +// Mnemonic : SAL +// Supported forms : (6 forms) +// +// * SALW 1, r16 +// * SALW imm8, r16 +// * SALW cl, r16 +// * SALW 1, m16 +// * SALW imm8, m16 +// * SALW cl, m16 +// +func (self *Program) SALW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SALW", 2, Operands { v0, v1 }) + // SALW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SALW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SALW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SALW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SALW") + } + return p +} + +// SARB performs "Arithmetic Shift Right". +// +// Mnemonic : SAR +// Supported forms : (6 forms) +// +// * SARB 1, r8 +// * SARB imm8, r8 +// * SARB cl, r8 +// * SARB 1, m8 +// * SARB imm8, m8 +// * SARB cl, m8 +// +func (self *Program) SARB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SARB", 2, Operands { v0, v1 }) + // SARB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SARB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(7, addr(v[1]), 1) + }) + } + // SARB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SARB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(7, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SARB") + } + return p +} + +// SARL performs "Arithmetic Shift Right". +// +// Mnemonic : SAR +// Supported forms : (6 forms) +// +// * SARL 1, r32 +// * SARL imm8, r32 +// * SARL cl, r32 +// * SARL 1, m32 +// * SARL imm8, m32 +// * SARL cl, m32 +// +func (self *Program) SARL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SARL", 2, Operands { v0, v1 }) + // SARL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SARL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(7, addr(v[1]), 1) + }) + } + // SARL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SARL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(7, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SARL") + } + return p +} + +// SARQ performs "Arithmetic Shift Right". +// +// Mnemonic : SAR +// Supported forms : (6 forms) +// +// * SARQ 1, r64 +// * SARQ imm8, r64 +// * SARQ cl, r64 +// * SARQ 1, m64 +// * SARQ imm8, m64 +// * SARQ cl, m64 +// +func (self *Program) SARQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SARQ", 2, Operands { v0, v1 }) + // SARQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SARQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(7, addr(v[1]), 1) + }) + } + // SARQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SARQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(7, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SARQ") + } + return p +} + +// SARW performs "Arithmetic Shift Right". +// +// Mnemonic : SAR +// Supported forms : (6 forms) +// +// * SARW 1, r16 +// * SARW imm8, r16 +// * SARW cl, r16 +// * SARW 1, m16 +// * SARW imm8, m16 +// * SARW cl, m16 +// +func (self *Program) SARW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SARW", 2, Operands { v0, v1 }) + // SARW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SARW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(7, addr(v[1]), 1) + }) + } + // SARW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SARW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(7, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SARW") + } + return p +} + +// SARXL performs "Arithmetic Shift Right Without Affecting Flags". +// +// Mnemonic : SARX +// Supported forms : (2 forms) +// +// * SARXL r32, r32, r32 [BMI2] +// * SARXL r32, m32, r32 [BMI2] +// +func (self *Program) SARXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SARXL", 3, Operands { v0, v1, v2 }) + // SARXL r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7a ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // SARXL r32, m32, r32 + if isReg32(v0) && isM32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x02, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SARXL") + } + return p +} + +// SARXQ performs "Arithmetic Shift Right Without Affecting Flags". +// +// Mnemonic : SARX +// Supported forms : (2 forms) +// +// * SARXQ r64, r64, r64 [BMI2] +// * SARXQ r64, m64, r64 [BMI2] +// +func (self *Program) SARXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SARXQ", 3, Operands { v0, v1, v2 }) + // SARXQ r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xfa ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // SARXQ r64, m64, r64 + if isReg64(v0) && isM64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x82, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SARXQ") + } + return p +} + +// SBBB performs "Subtract with Borrow". +// +// Mnemonic : SBB +// Supported forms : (6 forms) +// +// * SBBB imm8, al +// * SBBB imm8, r8 +// * SBBB r8, r8 +// * SBBB m8, r8 +// * SBBB imm8, m8 +// * SBBB r8, m8 +// +func (self *Program) SBBB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SBBB", 2, Operands { v0, v1 }) + // SBBB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x1c) + m.imm1(toImmAny(v[0])) + }) + } + // SBBB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SBBB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x18) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x1a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SBBB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x1a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SBBB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SBBB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x18) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SBBB") + } + return p +} + +// SBBL performs "Subtract with Borrow". +// +// Mnemonic : SBB +// Supported forms : (8 forms) +// +// * SBBL imm32, eax +// * SBBL imm8, r32 +// * SBBL imm32, r32 +// * SBBL r32, r32 +// * SBBL m32, r32 +// * SBBL imm8, m32 +// * SBBL imm32, m32 +// * SBBL r32, m32 +// +func (self *Program) SBBL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SBBL", 2, Operands { v0, v1 }) + // SBBL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x1d) + m.imm4(toImmAny(v[0])) + }) + } + // SBBL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SBBL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xd8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // SBBL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x19) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x1b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SBBL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x1b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SBBL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SBBL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(3, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // SBBL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x19) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SBBL") + } + return p +} + +// SBBQ performs "Subtract with Borrow". +// +// Mnemonic : SBB +// Supported forms : (8 forms) +// +// * SBBQ imm32, rax +// * SBBQ imm8, r64 +// * SBBQ imm32, r64 +// * SBBQ r64, r64 +// * SBBQ m64, r64 +// * SBBQ imm8, m64 +// * SBBQ imm32, m64 +// * SBBQ r64, m64 +// +func (self *Program) SBBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SBBQ", 2, Operands { v0, v1 }) + // SBBQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x1d) + m.imm4(toImmAny(v[0])) + }) + } + // SBBQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SBBQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xd8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // SBBQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x19) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x1b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SBBQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x1b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SBBQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SBBQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(3, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // SBBQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x19) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SBBQ") + } + return p +} + +// SBBW performs "Subtract with Borrow". +// +// Mnemonic : SBB +// Supported forms : (8 forms) +// +// * SBBW imm16, ax +// * SBBW imm8, r16 +// * SBBW imm16, r16 +// * SBBW r16, r16 +// * SBBW m16, r16 +// * SBBW imm8, m16 +// * SBBW imm16, m16 +// * SBBW r16, m16 +// +func (self *Program) SBBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SBBW", 2, Operands { v0, v1 }) + // SBBW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x1d) + m.imm2(toImmAny(v[0])) + }) + } + // SBBW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SBBW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xd8 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // SBBW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x19) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x1b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SBBW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x1b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SBBW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SBBW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(3, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // SBBW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x19) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SBBW") + } + return p +} + +// SETA performs "Set byte if above (CF == 0 and ZF == 0)". +// +// Mnemonic : SETA +// Supported forms : (2 forms) +// +// * SETA r8 +// * SETA m8 +// +func (self *Program) SETA(v0 interface{}) *Instruction { + p := self.alloc("SETA", 1, Operands { v0 }) + // SETA r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x97) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETA m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x97) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETA") + } + return p +} + +// SETAE performs "Set byte if above or equal (CF == 0)". +// +// Mnemonic : SETAE +// Supported forms : (2 forms) +// +// * SETAE r8 +// * SETAE m8 +// +func (self *Program) SETAE(v0 interface{}) *Instruction { + p := self.alloc("SETAE", 1, Operands { v0 }) + // SETAE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x93) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETAE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x93) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETAE") + } + return p +} + +// SETB performs "Set byte if below (CF == 1)". +// +// Mnemonic : SETB +// Supported forms : (2 forms) +// +// * SETB r8 +// * SETB m8 +// +func (self *Program) SETB(v0 interface{}) *Instruction { + p := self.alloc("SETB", 1, Operands { v0 }) + // SETB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x92) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x92) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETB") + } + return p +} + +// SETBE performs "Set byte if below or equal (CF == 1 or ZF == 1)". +// +// Mnemonic : SETBE +// Supported forms : (2 forms) +// +// * SETBE r8 +// * SETBE m8 +// +func (self *Program) SETBE(v0 interface{}) *Instruction { + p := self.alloc("SETBE", 1, Operands { v0 }) + // SETBE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x96) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETBE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x96) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETBE") + } + return p +} + +// SETC performs "Set byte if carry (CF == 1)". +// +// Mnemonic : SETC +// Supported forms : (2 forms) +// +// * SETC r8 +// * SETC m8 +// +func (self *Program) SETC(v0 interface{}) *Instruction { + p := self.alloc("SETC", 1, Operands { v0 }) + // SETC r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x92) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETC m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x92) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETC") + } + return p +} + +// SETE performs "Set byte if equal (ZF == 1)". +// +// Mnemonic : SETE +// Supported forms : (2 forms) +// +// * SETE r8 +// * SETE m8 +// +func (self *Program) SETE(v0 interface{}) *Instruction { + p := self.alloc("SETE", 1, Operands { v0 }) + // SETE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x94) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x94) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETE") + } + return p +} + +// SETG performs "Set byte if greater (ZF == 0 and SF == OF)". +// +// Mnemonic : SETG +// Supported forms : (2 forms) +// +// * SETG r8 +// * SETG m8 +// +func (self *Program) SETG(v0 interface{}) *Instruction { + p := self.alloc("SETG", 1, Operands { v0 }) + // SETG r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETG m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9f) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETG") + } + return p +} + +// SETGE performs "Set byte if greater or equal (SF == OF)". +// +// Mnemonic : SETGE +// Supported forms : (2 forms) +// +// * SETGE r8 +// * SETGE m8 +// +func (self *Program) SETGE(v0 interface{}) *Instruction { + p := self.alloc("SETGE", 1, Operands { v0 }) + // SETGE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETGE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9d) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETGE") + } + return p +} + +// SETL performs "Set byte if less (SF != OF)". +// +// Mnemonic : SETL +// Supported forms : (2 forms) +// +// * SETL r8 +// * SETL m8 +// +func (self *Program) SETL(v0 interface{}) *Instruction { + p := self.alloc("SETL", 1, Operands { v0 }) + // SETL r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETL m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9c) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETL") + } + return p +} + +// SETLE performs "Set byte if less or equal (ZF == 1 or SF != OF)". +// +// Mnemonic : SETLE +// Supported forms : (2 forms) +// +// * SETLE r8 +// * SETLE m8 +// +func (self *Program) SETLE(v0 interface{}) *Instruction { + p := self.alloc("SETLE", 1, Operands { v0 }) + // SETLE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETLE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9e) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETLE") + } + return p +} + +// SETNA performs "Set byte if not above (CF == 1 or ZF == 1)". +// +// Mnemonic : SETNA +// Supported forms : (2 forms) +// +// * SETNA r8 +// * SETNA m8 +// +func (self *Program) SETNA(v0 interface{}) *Instruction { + p := self.alloc("SETNA", 1, Operands { v0 }) + // SETNA r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x96) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNA m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x96) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNA") + } + return p +} + +// SETNAE performs "Set byte if not above or equal (CF == 1)". +// +// Mnemonic : SETNAE +// Supported forms : (2 forms) +// +// * SETNAE r8 +// * SETNAE m8 +// +func (self *Program) SETNAE(v0 interface{}) *Instruction { + p := self.alloc("SETNAE", 1, Operands { v0 }) + // SETNAE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x92) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNAE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x92) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNAE") + } + return p +} + +// SETNB performs "Set byte if not below (CF == 0)". +// +// Mnemonic : SETNB +// Supported forms : (2 forms) +// +// * SETNB r8 +// * SETNB m8 +// +func (self *Program) SETNB(v0 interface{}) *Instruction { + p := self.alloc("SETNB", 1, Operands { v0 }) + // SETNB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x93) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x93) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNB") + } + return p +} + +// SETNBE performs "Set byte if not below or equal (CF == 0 and ZF == 0)". +// +// Mnemonic : SETNBE +// Supported forms : (2 forms) +// +// * SETNBE r8 +// * SETNBE m8 +// +func (self *Program) SETNBE(v0 interface{}) *Instruction { + p := self.alloc("SETNBE", 1, Operands { v0 }) + // SETNBE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x97) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNBE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x97) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNBE") + } + return p +} + +// SETNC performs "Set byte if not carry (CF == 0)". +// +// Mnemonic : SETNC +// Supported forms : (2 forms) +// +// * SETNC r8 +// * SETNC m8 +// +func (self *Program) SETNC(v0 interface{}) *Instruction { + p := self.alloc("SETNC", 1, Operands { v0 }) + // SETNC r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x93) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNC m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x93) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNC") + } + return p +} + +// SETNE performs "Set byte if not equal (ZF == 0)". +// +// Mnemonic : SETNE +// Supported forms : (2 forms) +// +// * SETNE r8 +// * SETNE m8 +// +func (self *Program) SETNE(v0 interface{}) *Instruction { + p := self.alloc("SETNE", 1, Operands { v0 }) + // SETNE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x95) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x95) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNE") + } + return p +} + +// SETNG performs "Set byte if not greater (ZF == 1 or SF != OF)". +// +// Mnemonic : SETNG +// Supported forms : (2 forms) +// +// * SETNG r8 +// * SETNG m8 +// +func (self *Program) SETNG(v0 interface{}) *Instruction { + p := self.alloc("SETNG", 1, Operands { v0 }) + // SETNG r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNG m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9e) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNG") + } + return p +} + +// SETNGE performs "Set byte if not greater or equal (SF != OF)". +// +// Mnemonic : SETNGE +// Supported forms : (2 forms) +// +// * SETNGE r8 +// * SETNGE m8 +// +func (self *Program) SETNGE(v0 interface{}) *Instruction { + p := self.alloc("SETNGE", 1, Operands { v0 }) + // SETNGE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNGE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9c) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNGE") + } + return p +} + +// SETNL performs "Set byte if not less (SF == OF)". +// +// Mnemonic : SETNL +// Supported forms : (2 forms) +// +// * SETNL r8 +// * SETNL m8 +// +func (self *Program) SETNL(v0 interface{}) *Instruction { + p := self.alloc("SETNL", 1, Operands { v0 }) + // SETNL r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNL m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9d) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNL") + } + return p +} + +// SETNLE performs "Set byte if not less or equal (ZF == 0 and SF == OF)". +// +// Mnemonic : SETNLE +// Supported forms : (2 forms) +// +// * SETNLE r8 +// * SETNLE m8 +// +func (self *Program) SETNLE(v0 interface{}) *Instruction { + p := self.alloc("SETNLE", 1, Operands { v0 }) + // SETNLE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNLE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9f) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNLE") + } + return p +} + +// SETNO performs "Set byte if not overflow (OF == 0)". +// +// Mnemonic : SETNO +// Supported forms : (2 forms) +// +// * SETNO r8 +// * SETNO m8 +// +func (self *Program) SETNO(v0 interface{}) *Instruction { + p := self.alloc("SETNO", 1, Operands { v0 }) + // SETNO r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x91) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNO m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x91) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNO") + } + return p +} + +// SETNP performs "Set byte if not parity (PF == 0)". +// +// Mnemonic : SETNP +// Supported forms : (2 forms) +// +// * SETNP r8 +// * SETNP m8 +// +func (self *Program) SETNP(v0 interface{}) *Instruction { + p := self.alloc("SETNP", 1, Operands { v0 }) + // SETNP r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNP m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9b) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNP") + } + return p +} + +// SETNS performs "Set byte if not sign (SF == 0)". +// +// Mnemonic : SETNS +// Supported forms : (2 forms) +// +// * SETNS r8 +// * SETNS m8 +// +func (self *Program) SETNS(v0 interface{}) *Instruction { + p := self.alloc("SETNS", 1, Operands { v0 }) + // SETNS r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x99) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNS m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x99) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNS") + } + return p +} + +// SETNZ performs "Set byte if not zero (ZF == 0)". +// +// Mnemonic : SETNZ +// Supported forms : (2 forms) +// +// * SETNZ r8 +// * SETNZ m8 +// +func (self *Program) SETNZ(v0 interface{}) *Instruction { + p := self.alloc("SETNZ", 1, Operands { v0 }) + // SETNZ r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x95) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNZ m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x95) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNZ") + } + return p +} + +// SETO performs "Set byte if overflow (OF == 1)". +// +// Mnemonic : SETO +// Supported forms : (2 forms) +// +// * SETO r8 +// * SETO m8 +// +func (self *Program) SETO(v0 interface{}) *Instruction { + p := self.alloc("SETO", 1, Operands { v0 }) + // SETO r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x90) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETO m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x90) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETO") + } + return p +} + +// SETP performs "Set byte if parity (PF == 1)". +// +// Mnemonic : SETP +// Supported forms : (2 forms) +// +// * SETP r8 +// * SETP m8 +// +func (self *Program) SETP(v0 interface{}) *Instruction { + p := self.alloc("SETP", 1, Operands { v0 }) + // SETP r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETP m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9a) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETP") + } + return p +} + +// SETPE performs "Set byte if parity even (PF == 1)". +// +// Mnemonic : SETPE +// Supported forms : (2 forms) +// +// * SETPE r8 +// * SETPE m8 +// +func (self *Program) SETPE(v0 interface{}) *Instruction { + p := self.alloc("SETPE", 1, Operands { v0 }) + // SETPE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETPE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9a) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETPE") + } + return p +} + +// SETPO performs "Set byte if parity odd (PF == 0)". +// +// Mnemonic : SETPO +// Supported forms : (2 forms) +// +// * SETPO r8 +// * SETPO m8 +// +func (self *Program) SETPO(v0 interface{}) *Instruction { + p := self.alloc("SETPO", 1, Operands { v0 }) + // SETPO r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETPO m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9b) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETPO") + } + return p +} + +// SETS performs "Set byte if sign (SF == 1)". +// +// Mnemonic : SETS +// Supported forms : (2 forms) +// +// * SETS r8 +// * SETS m8 +// +func (self *Program) SETS(v0 interface{}) *Instruction { + p := self.alloc("SETS", 1, Operands { v0 }) + // SETS r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x98) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETS m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x98) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETS") + } + return p +} + +// SETZ performs "Set byte if zero (ZF == 1)". +// +// Mnemonic : SETZ +// Supported forms : (2 forms) +// +// * SETZ r8 +// * SETZ m8 +// +func (self *Program) SETZ(v0 interface{}) *Instruction { + p := self.alloc("SETZ", 1, Operands { v0 }) + // SETZ r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x94) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETZ m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x94) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETZ") + } + return p +} + +// SFENCE performs "Store Fence". +// +// Mnemonic : SFENCE +// Supported forms : (1 form) +// +// * SFENCE [MMX+] +// +func (self *Program) SFENCE() *Instruction { + p := self.alloc("SFENCE", 0, Operands { }) + // SFENCE + self.require(ISA_MMX_PLUS) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0xae) + m.emit(0xf8) + }) + return p +} + +// SHA1MSG1 performs "Perform an Intermediate Calculation for the Next Four SHA1 Message Doublewords". +// +// Mnemonic : SHA1MSG1 +// Supported forms : (2 forms) +// +// * SHA1MSG1 xmm, xmm [SHA] +// * SHA1MSG1 m128, xmm [SHA] +// +func (self *Program) SHA1MSG1(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHA1MSG1", 2, Operands { v0, v1 }) + // SHA1MSG1 xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xc9) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SHA1MSG1 m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xc9) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHA1MSG1") + } + return p +} + +// SHA1MSG2 performs "Perform a Final Calculation for the Next Four SHA1 Message Doublewords". +// +// Mnemonic : SHA1MSG2 +// Supported forms : (2 forms) +// +// * SHA1MSG2 xmm, xmm [SHA] +// * SHA1MSG2 m128, xmm [SHA] +// +func (self *Program) SHA1MSG2(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHA1MSG2", 2, Operands { v0, v1 }) + // SHA1MSG2 xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xca) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SHA1MSG2 m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xca) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHA1MSG2") + } + return p +} + +// SHA1NEXTE performs "Calculate SHA1 State Variable E after Four Rounds". +// +// Mnemonic : SHA1NEXTE +// Supported forms : (2 forms) +// +// * SHA1NEXTE xmm, xmm [SHA] +// * SHA1NEXTE m128, xmm [SHA] +// +func (self *Program) SHA1NEXTE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHA1NEXTE", 2, Operands { v0, v1 }) + // SHA1NEXTE xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xc8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SHA1NEXTE m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xc8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHA1NEXTE") + } + return p +} + +// SHA1RNDS4 performs "Perform Four Rounds of SHA1 Operation". +// +// Mnemonic : SHA1RNDS4 +// Supported forms : (2 forms) +// +// * SHA1RNDS4 imm8, xmm, xmm [SHA] +// * SHA1RNDS4 imm8, m128, xmm [SHA] +// +func (self *Program) SHA1RNDS4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHA1RNDS4", 3, Operands { v0, v1, v2 }) + // SHA1RNDS4 imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0xcc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHA1RNDS4 imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0xcc) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for SHA1RNDS4") + } + return p +} + +// SHA256MSG1 performs "Perform an Intermediate Calculation for the Next Four SHA256 Message Doublewords". +// +// Mnemonic : SHA256MSG1 +// Supported forms : (2 forms) +// +// * SHA256MSG1 xmm, xmm [SHA] +// * SHA256MSG1 m128, xmm [SHA] +// +func (self *Program) SHA256MSG1(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHA256MSG1", 2, Operands { v0, v1 }) + // SHA256MSG1 xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xcc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SHA256MSG1 m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xcc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHA256MSG1") + } + return p +} + +// SHA256MSG2 performs "Perform a Final Calculation for the Next Four SHA256 Message Doublewords". +// +// Mnemonic : SHA256MSG2 +// Supported forms : (2 forms) +// +// * SHA256MSG2 xmm, xmm [SHA] +// * SHA256MSG2 m128, xmm [SHA] +// +func (self *Program) SHA256MSG2(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHA256MSG2", 2, Operands { v0, v1 }) + // SHA256MSG2 xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xcd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SHA256MSG2 m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xcd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHA256MSG2") + } + return p +} + +// SHA256RNDS2 performs "Perform Two Rounds of SHA256 Operation". +// +// Mnemonic : SHA256RNDS2 +// Supported forms : (2 forms) +// +// * SHA256RNDS2 xmm0, xmm, xmm [SHA] +// * SHA256RNDS2 xmm0, m128, xmm [SHA] +// +func (self *Program) SHA256RNDS2(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHA256RNDS2", 3, Operands { v0, v1, v2 }) + // SHA256RNDS2 xmm0, xmm, xmm + if v0 == XMM0 && isXMM(v1) && isXMM(v2) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xcb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // SHA256RNDS2 xmm0, m128, xmm + if v0 == XMM0 && isM128(v1) && isXMM(v2) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xcb) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHA256RNDS2") + } + return p +} + +// SHLB performs "Logical Shift Left". +// +// Mnemonic : SHL +// Supported forms : (6 forms) +// +// * SHLB 1, r8 +// * SHLB imm8, r8 +// * SHLB cl, r8 +// * SHLB 1, m8 +// * SHLB imm8, m8 +// * SHLB cl, m8 +// +func (self *Program) SHLB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHLB", 2, Operands { v0, v1 }) + // SHLB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHLB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SHLB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHLB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLB") + } + return p +} + +// SHLDL performs "Integer Double Precision Shift Left". +// +// Mnemonic : SHLD +// Supported forms : (4 forms) +// +// * SHLDL imm8, r32, r32 +// * SHLDL cl, r32, r32 +// * SHLDL imm8, r32, m32 +// * SHLDL cl, r32, m32 +// +func (self *Program) SHLDL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHLDL", 3, Operands { v0, v1, v2 }) + // SHLDL imm8, r32, r32 + if isImm8(v0) && isReg32(v1) && isReg32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xa4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // SHLDL cl, r32, r32 + if v0 == CL && isReg32(v1) && isReg32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xa5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + }) + } + // SHLDL imm8, r32, m32 + if isImm8(v0) && isReg32(v1) && isM32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xa4) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHLDL cl, r32, m32 + if v0 == CL && isReg32(v1) && isM32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xa5) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLDL") + } + return p +} + +// SHLDQ performs "Integer Double Precision Shift Left". +// +// Mnemonic : SHLD +// Supported forms : (4 forms) +// +// * SHLDQ imm8, r64, r64 +// * SHLDQ cl, r64, r64 +// * SHLDQ imm8, r64, m64 +// * SHLDQ cl, r64, m64 +// +func (self *Program) SHLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHLDQ", 3, Operands { v0, v1, v2 }) + // SHLDQ imm8, r64, r64 + if isImm8(v0) && isReg64(v1) && isReg64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2])) + m.emit(0x0f) + m.emit(0xa4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // SHLDQ cl, r64, r64 + if v0 == CL && isReg64(v1) && isReg64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2])) + m.emit(0x0f) + m.emit(0xa5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + }) + } + // SHLDQ imm8, r64, m64 + if isImm8(v0) && isReg64(v1) && isM64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[2])) + m.emit(0x0f) + m.emit(0xa4) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHLDQ cl, r64, m64 + if v0 == CL && isReg64(v1) && isM64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[2])) + m.emit(0x0f) + m.emit(0xa5) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLDQ") + } + return p +} + +// SHLDW performs "Integer Double Precision Shift Left". +// +// Mnemonic : SHLD +// Supported forms : (4 forms) +// +// * SHLDW imm8, r16, r16 +// * SHLDW cl, r16, r16 +// * SHLDW imm8, r16, m16 +// * SHLDW cl, r16, m16 +// +func (self *Program) SHLDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHLDW", 3, Operands { v0, v1, v2 }) + // SHLDW imm8, r16, r16 + if isImm8(v0) && isReg16(v1) && isReg16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xa4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // SHLDW cl, r16, r16 + if v0 == CL && isReg16(v1) && isReg16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xa5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + }) + } + // SHLDW imm8, r16, m16 + if isImm8(v0) && isReg16(v1) && isM16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xa4) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHLDW cl, r16, m16 + if v0 == CL && isReg16(v1) && isM16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xa5) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLDW") + } + return p +} + +// SHLL performs "Logical Shift Left". +// +// Mnemonic : SHL +// Supported forms : (6 forms) +// +// * SHLL 1, r32 +// * SHLL imm8, r32 +// * SHLL cl, r32 +// * SHLL 1, m32 +// * SHLL imm8, m32 +// * SHLL cl, m32 +// +func (self *Program) SHLL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHLL", 2, Operands { v0, v1 }) + // SHLL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHLL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SHLL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHLL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLL") + } + return p +} + +// SHLQ performs "Logical Shift Left". +// +// Mnemonic : SHL +// Supported forms : (6 forms) +// +// * SHLQ 1, r64 +// * SHLQ imm8, r64 +// * SHLQ cl, r64 +// * SHLQ 1, m64 +// * SHLQ imm8, m64 +// * SHLQ cl, m64 +// +func (self *Program) SHLQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHLQ", 2, Operands { v0, v1 }) + // SHLQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHLQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SHLQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHLQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLQ") + } + return p +} + +// SHLW performs "Logical Shift Left". +// +// Mnemonic : SHL +// Supported forms : (6 forms) +// +// * SHLW 1, r16 +// * SHLW imm8, r16 +// * SHLW cl, r16 +// * SHLW 1, m16 +// * SHLW imm8, m16 +// * SHLW cl, m16 +// +func (self *Program) SHLW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHLW", 2, Operands { v0, v1 }) + // SHLW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHLW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SHLW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHLW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLW") + } + return p +} + +// SHLXL performs "Logical Shift Left Without Affecting Flags". +// +// Mnemonic : SHLX +// Supported forms : (2 forms) +// +// * SHLXL r32, r32, r32 [BMI2] +// * SHLXL r32, m32, r32 [BMI2] +// +func (self *Program) SHLXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHLXL", 3, Operands { v0, v1, v2 }) + // SHLXL r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // SHLXL r32, m32, r32 + if isReg32(v0) && isM32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLXL") + } + return p +} + +// SHLXQ performs "Logical Shift Left Without Affecting Flags". +// +// Mnemonic : SHLX +// Supported forms : (2 forms) +// +// * SHLXQ r64, r64, r64 [BMI2] +// * SHLXQ r64, m64, r64 [BMI2] +// +func (self *Program) SHLXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHLXQ", 3, Operands { v0, v1, v2 }) + // SHLXQ r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf9 ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // SHLXQ r64, m64, r64 + if isReg64(v0) && isM64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLXQ") + } + return p +} + +// SHRB performs "Logical Shift Right". +// +// Mnemonic : SHR +// Supported forms : (6 forms) +// +// * SHRB 1, r8 +// * SHRB imm8, r8 +// * SHRB cl, r8 +// * SHRB 1, m8 +// * SHRB imm8, m8 +// * SHRB cl, m8 +// +func (self *Program) SHRB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHRB", 2, Operands { v0, v1 }) + // SHRB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHRB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(5, addr(v[1]), 1) + }) + } + // SHRB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHRB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(5, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRB") + } + return p +} + +// SHRDL performs "Integer Double Precision Shift Right". +// +// Mnemonic : SHRD +// Supported forms : (4 forms) +// +// * SHRDL imm8, r32, r32 +// * SHRDL cl, r32, r32 +// * SHRDL imm8, r32, m32 +// * SHRDL cl, r32, m32 +// +func (self *Program) SHRDL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHRDL", 3, Operands { v0, v1, v2 }) + // SHRDL imm8, r32, r32 + if isImm8(v0) && isReg32(v1) && isReg32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xac) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // SHRDL cl, r32, r32 + if v0 == CL && isReg32(v1) && isReg32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xad) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + }) + } + // SHRDL imm8, r32, m32 + if isImm8(v0) && isReg32(v1) && isM32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xac) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHRDL cl, r32, m32 + if v0 == CL && isReg32(v1) && isM32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xad) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRDL") + } + return p +} + +// SHRDQ performs "Integer Double Precision Shift Right". +// +// Mnemonic : SHRD +// Supported forms : (4 forms) +// +// * SHRDQ imm8, r64, r64 +// * SHRDQ cl, r64, r64 +// * SHRDQ imm8, r64, m64 +// * SHRDQ cl, r64, m64 +// +func (self *Program) SHRDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHRDQ", 3, Operands { v0, v1, v2 }) + // SHRDQ imm8, r64, r64 + if isImm8(v0) && isReg64(v1) && isReg64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2])) + m.emit(0x0f) + m.emit(0xac) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // SHRDQ cl, r64, r64 + if v0 == CL && isReg64(v1) && isReg64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2])) + m.emit(0x0f) + m.emit(0xad) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + }) + } + // SHRDQ imm8, r64, m64 + if isImm8(v0) && isReg64(v1) && isM64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[2])) + m.emit(0x0f) + m.emit(0xac) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHRDQ cl, r64, m64 + if v0 == CL && isReg64(v1) && isM64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[2])) + m.emit(0x0f) + m.emit(0xad) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRDQ") + } + return p +} + +// SHRDW performs "Integer Double Precision Shift Right". +// +// Mnemonic : SHRD +// Supported forms : (4 forms) +// +// * SHRDW imm8, r16, r16 +// * SHRDW cl, r16, r16 +// * SHRDW imm8, r16, m16 +// * SHRDW cl, r16, m16 +// +func (self *Program) SHRDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHRDW", 3, Operands { v0, v1, v2 }) + // SHRDW imm8, r16, r16 + if isImm8(v0) && isReg16(v1) && isReg16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xac) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // SHRDW cl, r16, r16 + if v0 == CL && isReg16(v1) && isReg16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xad) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + }) + } + // SHRDW imm8, r16, m16 + if isImm8(v0) && isReg16(v1) && isM16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xac) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHRDW cl, r16, m16 + if v0 == CL && isReg16(v1) && isM16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xad) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRDW") + } + return p +} + +// SHRL performs "Logical Shift Right". +// +// Mnemonic : SHR +// Supported forms : (6 forms) +// +// * SHRL 1, r32 +// * SHRL imm8, r32 +// * SHRL cl, r32 +// * SHRL 1, m32 +// * SHRL imm8, m32 +// * SHRL cl, m32 +// +func (self *Program) SHRL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHRL", 2, Operands { v0, v1 }) + // SHRL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHRL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(5, addr(v[1]), 1) + }) + } + // SHRL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHRL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(5, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRL") + } + return p +} + +// SHRQ performs "Logical Shift Right". +// +// Mnemonic : SHR +// Supported forms : (6 forms) +// +// * SHRQ 1, r64 +// * SHRQ imm8, r64 +// * SHRQ cl, r64 +// * SHRQ 1, m64 +// * SHRQ imm8, m64 +// * SHRQ cl, m64 +// +func (self *Program) SHRQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHRQ", 2, Operands { v0, v1 }) + // SHRQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHRQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(5, addr(v[1]), 1) + }) + } + // SHRQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHRQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(5, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRQ") + } + return p +} + +// SHRW performs "Logical Shift Right". +// +// Mnemonic : SHR +// Supported forms : (6 forms) +// +// * SHRW 1, r16 +// * SHRW imm8, r16 +// * SHRW cl, r16 +// * SHRW 1, m16 +// * SHRW imm8, m16 +// * SHRW cl, m16 +// +func (self *Program) SHRW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHRW", 2, Operands { v0, v1 }) + // SHRW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHRW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(5, addr(v[1]), 1) + }) + } + // SHRW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHRW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(5, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRW") + } + return p +} + +// SHRXL performs "Logical Shift Right Without Affecting Flags". +// +// Mnemonic : SHRX +// Supported forms : (2 forms) +// +// * SHRXL r32, r32, r32 [BMI2] +// * SHRXL r32, m32, r32 [BMI2] +// +func (self *Program) SHRXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHRXL", 3, Operands { v0, v1, v2 }) + // SHRXL r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7b ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // SHRXL r32, m32, r32 + if isReg32(v0) && isM32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x03, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRXL") + } + return p +} + +// SHRXQ performs "Logical Shift Right Without Affecting Flags". +// +// Mnemonic : SHRX +// Supported forms : (2 forms) +// +// * SHRXQ r64, r64, r64 [BMI2] +// * SHRXQ r64, m64, r64 [BMI2] +// +func (self *Program) SHRXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHRXQ", 3, Operands { v0, v1, v2 }) + // SHRXQ r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xfb ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // SHRXQ r64, m64, r64 + if isReg64(v0) && isM64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x83, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRXQ") + } + return p +} + +// SHUFPD performs "Shuffle Packed Double-Precision Floating-Point Values". +// +// Mnemonic : SHUFPD +// Supported forms : (2 forms) +// +// * SHUFPD imm8, xmm, xmm [SSE2] +// * SHUFPD imm8, m128, xmm [SSE2] +// +func (self *Program) SHUFPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHUFPD", 3, Operands { v0, v1, v2 }) + // SHUFPD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHUFPD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc6) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for SHUFPD") + } + return p +} + +// SHUFPS performs "Shuffle Packed Single-Precision Floating-Point Values". +// +// Mnemonic : SHUFPS +// Supported forms : (2 forms) +// +// * SHUFPS imm8, xmm, xmm [SSE] +// * SHUFPS imm8, m128, xmm [SSE] +// +func (self *Program) SHUFPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHUFPS", 3, Operands { v0, v1, v2 }) + // SHUFPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHUFPS imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc6) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for SHUFPS") + } + return p +} + +// SQRTPD performs "Compute Square Roots of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : SQRTPD +// Supported forms : (2 forms) +// +// * SQRTPD xmm, xmm [SSE2] +// * SQRTPD m128, xmm [SSE2] +// +func (self *Program) SQRTPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SQRTPD", 2, Operands { v0, v1 }) + // SQRTPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SQRTPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SQRTPD") + } + return p +} + +// SQRTPS performs "Compute Square Roots of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : SQRTPS +// Supported forms : (2 forms) +// +// * SQRTPS xmm, xmm [SSE] +// * SQRTPS m128, xmm [SSE] +// +func (self *Program) SQRTPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SQRTPS", 2, Operands { v0, v1 }) + // SQRTPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SQRTPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SQRTPS") + } + return p +} + +// SQRTSD performs "Compute Square Root of Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : SQRTSD +// Supported forms : (2 forms) +// +// * SQRTSD xmm, xmm [SSE2] +// * SQRTSD m64, xmm [SSE2] +// +func (self *Program) SQRTSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SQRTSD", 2, Operands { v0, v1 }) + // SQRTSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SQRTSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SQRTSD") + } + return p +} + +// SQRTSS performs "Compute Square Root of Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : SQRTSS +// Supported forms : (2 forms) +// +// * SQRTSS xmm, xmm [SSE] +// * SQRTSS m32, xmm [SSE] +// +func (self *Program) SQRTSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SQRTSS", 2, Operands { v0, v1 }) + // SQRTSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SQRTSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SQRTSS") + } + return p +} + +// STC performs "Set Carry Flag". +// +// Mnemonic : STC +// Supported forms : (1 form) +// +// * STC +// +func (self *Program) STC() *Instruction { + p := self.alloc("STC", 0, Operands { }) + // STC + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf9) + }) + return p +} + +// STD performs "Set Direction Flag". +// +// Mnemonic : STD +// Supported forms : (1 form) +// +// * STD +// +func (self *Program) STD() *Instruction { + p := self.alloc("STD", 0, Operands { }) + // STD + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xfd) + }) + return p +} + +// STMXCSR performs "Store MXCSR Register State". +// +// Mnemonic : STMXCSR +// Supported forms : (1 form) +// +// * STMXCSR m32 [SSE] +// +func (self *Program) STMXCSR(v0 interface{}) *Instruction { + p := self.alloc("STMXCSR", 1, Operands { v0 }) + // STMXCSR m32 + if isM32(v0) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0xae) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for STMXCSR") + } + return p +} + +// SUBB performs "Subtract". +// +// Mnemonic : SUB +// Supported forms : (6 forms) +// +// * SUBB imm8, al +// * SUBB imm8, r8 +// * SUBB r8, r8 +// * SUBB m8, r8 +// * SUBB imm8, m8 +// * SUBB r8, m8 +// +func (self *Program) SUBB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBB", 2, Operands { v0, v1 }) + // SUBB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x2c) + m.imm1(toImmAny(v[0])) + }) + } + // SUBB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SUBB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x28) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SUBB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SUBB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x28) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBB") + } + return p +} + +// SUBL performs "Subtract". +// +// Mnemonic : SUB +// Supported forms : (8 forms) +// +// * SUBL imm32, eax +// * SUBL imm8, r32 +// * SUBL imm32, r32 +// * SUBL r32, r32 +// * SUBL m32, r32 +// * SUBL imm8, m32 +// * SUBL imm32, m32 +// * SUBL r32, m32 +// +func (self *Program) SUBL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBL", 2, Operands { v0, v1 }) + // SUBL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x2d) + m.imm4(toImmAny(v[0])) + }) + } + // SUBL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SUBL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xe8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // SUBL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x2b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SUBL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SUBL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(5, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // SUBL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBL") + } + return p +} + +// SUBPD performs "Subtract Packed Double-Precision Floating-Point Values". +// +// Mnemonic : SUBPD +// Supported forms : (2 forms) +// +// * SUBPD xmm, xmm [SSE2] +// * SUBPD m128, xmm [SSE2] +// +func (self *Program) SUBPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBPD", 2, Operands { v0, v1 }) + // SUBPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBPD") + } + return p +} + +// SUBPS performs "Subtract Packed Single-Precision Floating-Point Values". +// +// Mnemonic : SUBPS +// Supported forms : (2 forms) +// +// * SUBPS xmm, xmm [SSE] +// * SUBPS m128, xmm [SSE] +// +func (self *Program) SUBPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBPS", 2, Operands { v0, v1 }) + // SUBPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBPS") + } + return p +} + +// SUBQ performs "Subtract". +// +// Mnemonic : SUB +// Supported forms : (8 forms) +// +// * SUBQ imm32, rax +// * SUBQ imm8, r64 +// * SUBQ imm32, r64 +// * SUBQ r64, r64 +// * SUBQ m64, r64 +// * SUBQ imm8, m64 +// * SUBQ imm32, m64 +// * SUBQ r64, m64 +// +func (self *Program) SUBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBQ", 2, Operands { v0, v1 }) + // SUBQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x2d) + m.imm4(toImmAny(v[0])) + }) + } + // SUBQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SUBQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xe8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // SUBQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x2b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SUBQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SUBQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(5, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // SUBQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBQ") + } + return p +} + +// SUBSD performs "Subtract Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : SUBSD +// Supported forms : (2 forms) +// +// * SUBSD xmm, xmm [SSE2] +// * SUBSD m64, xmm [SSE2] +// +func (self *Program) SUBSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBSD", 2, Operands { v0, v1 }) + // SUBSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBSD") + } + return p +} + +// SUBSS performs "Subtract Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : SUBSS +// Supported forms : (2 forms) +// +// * SUBSS xmm, xmm [SSE] +// * SUBSS m32, xmm [SSE] +// +func (self *Program) SUBSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBSS", 2, Operands { v0, v1 }) + // SUBSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBSS") + } + return p +} + +// SUBW performs "Subtract". +// +// Mnemonic : SUB +// Supported forms : (8 forms) +// +// * SUBW imm16, ax +// * SUBW imm8, r16 +// * SUBW imm16, r16 +// * SUBW r16, r16 +// * SUBW m16, r16 +// * SUBW imm8, m16 +// * SUBW imm16, m16 +// * SUBW r16, m16 +// +func (self *Program) SUBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBW", 2, Operands { v0, v1 }) + // SUBW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x2d) + m.imm2(toImmAny(v[0])) + }) + } + // SUBW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SUBW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xe8 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // SUBW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x2b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SUBW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SUBW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(5, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // SUBW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBW") + } + return p +} + +// SYSCALL performs "Fast System Call". +// +// Mnemonic : SYSCALL +// Supported forms : (1 form) +// +// * SYSCALL +// +func (self *Program) SYSCALL() *Instruction { + p := self.alloc("SYSCALL", 0, Operands { }) + // SYSCALL + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x05) + }) + return p +} + +// T1MSKC performs "Inverse Mask From Trailing Ones". +// +// Mnemonic : T1MSKC +// Supported forms : (4 forms) +// +// * T1MSKC r32, r32 [TBM] +// * T1MSKC m32, r32 [TBM] +// * T1MSKC r64, r64 [TBM] +// * T1MSKC m64, r64 [TBM] +// +func (self *Program) T1MSKC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("T1MSKC", 2, Operands { v0, v1 }) + // T1MSKC r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xf8 | lcode(v[0])) + }) + } + // T1MSKC m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(7, addr(v[0]), 1) + }) + } + // T1MSKC r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xf8 | lcode(v[0])) + }) + } + // T1MSKC m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(7, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for T1MSKC") + } + return p +} + +// TESTB performs "Logical Compare". +// +// Mnemonic : TEST +// Supported forms : (5 forms) +// +// * TESTB imm8, al +// * TESTB imm8, r8 +// * TESTB r8, r8 +// * TESTB imm8, m8 +// * TESTB r8, m8 +// +func (self *Program) TESTB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TESTB", 2, Operands { v0, v1 }) + // TESTB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xa8) + m.imm1(toImmAny(v[0])) + }) + } + // TESTB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // TESTB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x84) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // TESTB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xf6) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // TESTB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x84) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TESTB") + } + return p +} + +// TESTL performs "Logical Compare". +// +// Mnemonic : TEST +// Supported forms : (5 forms) +// +// * TESTL imm32, eax +// * TESTL imm32, r32 +// * TESTL r32, r32 +// * TESTL imm32, m32 +// * TESTL r32, m32 +// +func (self *Program) TESTL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TESTL", 2, Operands { v0, v1 }) + // TESTL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xa9) + m.imm4(toImmAny(v[0])) + }) + } + // TESTL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // TESTL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x85) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // TESTL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xf7) + m.mrsd(0, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // TESTL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x85) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TESTL") + } + return p +} + +// TESTQ performs "Logical Compare". +// +// Mnemonic : TEST +// Supported forms : (5 forms) +// +// * TESTQ imm32, rax +// * TESTQ imm32, r64 +// * TESTQ r64, r64 +// * TESTQ imm32, m64 +// * TESTQ r64, m64 +// +func (self *Program) TESTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TESTQ", 2, Operands { v0, v1 }) + // TESTQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0xa9) + m.imm4(toImmAny(v[0])) + }) + } + // TESTQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // TESTQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x85) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // TESTQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xf7) + m.mrsd(0, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // TESTQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x85) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TESTQ") + } + return p +} + +// TESTW performs "Logical Compare". +// +// Mnemonic : TEST +// Supported forms : (5 forms) +// +// * TESTW imm16, ax +// * TESTW imm16, r16 +// * TESTW r16, r16 +// * TESTW imm16, m16 +// * TESTW r16, m16 +// +func (self *Program) TESTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TESTW", 2, Operands { v0, v1 }) + // TESTW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xa9) + m.imm2(toImmAny(v[0])) + }) + } + // TESTW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // TESTW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x85) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // TESTW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xf7) + m.mrsd(0, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // TESTW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x85) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TESTW") + } + return p +} + +// TZCNTL performs "Count the Number of Trailing Zero Bits". +// +// Mnemonic : TZCNT +// Supported forms : (2 forms) +// +// * TZCNTL r32, r32 [BMI] +// * TZCNTL m32, r32 [BMI] +// +func (self *Program) TZCNTL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TZCNTL", 2, Operands { v0, v1 }) + // TZCNTL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // TZCNTL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TZCNTL") + } + return p +} + +// TZCNTQ performs "Count the Number of Trailing Zero Bits". +// +// Mnemonic : TZCNT +// Supported forms : (2 forms) +// +// * TZCNTQ r64, r64 [BMI] +// * TZCNTQ m64, r64 [BMI] +// +func (self *Program) TZCNTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TZCNTQ", 2, Operands { v0, v1 }) + // TZCNTQ r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // TZCNTQ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xbc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TZCNTQ") + } + return p +} + +// TZCNTW performs "Count the Number of Trailing Zero Bits". +// +// Mnemonic : TZCNT +// Supported forms : (2 forms) +// +// * TZCNTW r16, r16 [BMI] +// * TZCNTW m16, r16 [BMI] +// +func (self *Program) TZCNTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TZCNTW", 2, Operands { v0, v1 }) + // TZCNTW r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // TZCNTW m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TZCNTW") + } + return p +} + +// TZMSK performs "Mask From Trailing Zeros". +// +// Mnemonic : TZMSK +// Supported forms : (4 forms) +// +// * TZMSK r32, r32 [TBM] +// * TZMSK m32, r32 [TBM] +// * TZMSK r64, r64 [TBM] +// * TZMSK m64, r64 [TBM] +// +func (self *Program) TZMSK(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TZMSK", 2, Operands { v0, v1 }) + // TZMSK r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xe0 | lcode(v[0])) + }) + } + // TZMSK m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(4, addr(v[0]), 1) + }) + } + // TZMSK r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xe0 | lcode(v[0])) + }) + } + // TZMSK m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(4, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TZMSK") + } + return p +} + +// UCOMISD performs "Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : UCOMISD +// Supported forms : (2 forms) +// +// * UCOMISD xmm, xmm [SSE2] +// * UCOMISD m64, xmm [SSE2] +// +func (self *Program) UCOMISD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("UCOMISD", 2, Operands { v0, v1 }) + // UCOMISD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // UCOMISD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for UCOMISD") + } + return p +} + +// UCOMISS performs "Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : UCOMISS +// Supported forms : (2 forms) +// +// * UCOMISS xmm, xmm [SSE] +// * UCOMISS m32, xmm [SSE] +// +func (self *Program) UCOMISS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("UCOMISS", 2, Operands { v0, v1 }) + // UCOMISS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // UCOMISS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for UCOMISS") + } + return p +} + +// UD2 performs "Undefined Instruction". +// +// Mnemonic : UD2 +// Supported forms : (1 form) +// +// * UD2 +// +func (self *Program) UD2() *Instruction { + p := self.alloc("UD2", 0, Operands { }) + // UD2 + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x0b) + }) + return p +} + +// UNPCKHPD performs "Unpack and Interleave High Packed Double-Precision Floating-Point Values". +// +// Mnemonic : UNPCKHPD +// Supported forms : (2 forms) +// +// * UNPCKHPD xmm, xmm [SSE2] +// * UNPCKHPD m128, xmm [SSE2] +// +func (self *Program) UNPCKHPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("UNPCKHPD", 2, Operands { v0, v1 }) + // UNPCKHPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x15) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // UNPCKHPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x15) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for UNPCKHPD") + } + return p +} + +// UNPCKHPS performs "Unpack and Interleave High Packed Single-Precision Floating-Point Values". +// +// Mnemonic : UNPCKHPS +// Supported forms : (2 forms) +// +// * UNPCKHPS xmm, xmm [SSE] +// * UNPCKHPS m128, xmm [SSE] +// +func (self *Program) UNPCKHPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("UNPCKHPS", 2, Operands { v0, v1 }) + // UNPCKHPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x15) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // UNPCKHPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x15) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for UNPCKHPS") + } + return p +} + +// UNPCKLPD performs "Unpack and Interleave Low Packed Double-Precision Floating-Point Values". +// +// Mnemonic : UNPCKLPD +// Supported forms : (2 forms) +// +// * UNPCKLPD xmm, xmm [SSE2] +// * UNPCKLPD m128, xmm [SSE2] +// +func (self *Program) UNPCKLPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("UNPCKLPD", 2, Operands { v0, v1 }) + // UNPCKLPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x14) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // UNPCKLPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x14) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for UNPCKLPD") + } + return p +} + +// UNPCKLPS performs "Unpack and Interleave Low Packed Single-Precision Floating-Point Values". +// +// Mnemonic : UNPCKLPS +// Supported forms : (2 forms) +// +// * UNPCKLPS xmm, xmm [SSE] +// * UNPCKLPS m128, xmm [SSE] +// +func (self *Program) UNPCKLPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("UNPCKLPS", 2, Operands { v0, v1 }) + // UNPCKLPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x14) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // UNPCKLPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x14) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for UNPCKLPS") + } + return p +} + +// VADDPD performs "Add Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VADDPD +// Supported forms : (11 forms) +// +// * VADDPD xmm, xmm, xmm [AVX] +// * VADDPD m128, xmm, xmm [AVX] +// * VADDPD ymm, ymm, ymm [AVX] +// * VADDPD m256, ymm, ymm [AVX] +// * VADDPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VADDPD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VADDPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VADDPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VADDPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VADDPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VADDPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VADDPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VADDPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VADDPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VADDPD takes 3 or 4 operands") + } + // VADDPD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDPD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDPD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VADDPD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x58) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VADDPD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VADDPD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VADDPD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VADDPD") + } + return p +} + +// VADDPS performs "Add Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VADDPS +// Supported forms : (11 forms) +// +// * VADDPS xmm, xmm, xmm [AVX] +// * VADDPS m128, xmm, xmm [AVX] +// * VADDPS ymm, ymm, ymm [AVX] +// * VADDPS m256, ymm, ymm [AVX] +// * VADDPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VADDPS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VADDPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VADDPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VADDPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VADDPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VADDPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VADDPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VADDPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VADDPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VADDPS takes 3 or 4 operands") + } + // VADDPS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDPS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDPS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VADDPS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x58) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VADDPS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VADDPS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VADDPS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VADDPS") + } + return p +} + +// VADDSD performs "Add Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VADDSD +// Supported forms : (5 forms) +// +// * VADDSD xmm, xmm, xmm [AVX] +// * VADDSD m64, xmm, xmm [AVX] +// * VADDSD m64, xmm, xmm{k}{z} [AVX512F] +// * VADDSD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VADDSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VADDSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VADDSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VADDSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VADDSD takes 3 or 4 operands") + } + // VADDSD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDSD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VADDSD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x58) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VADDSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VADDSD") + } + return p +} + +// VADDSS performs "Add Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VADDSS +// Supported forms : (5 forms) +// +// * VADDSS xmm, xmm, xmm [AVX] +// * VADDSS m32, xmm, xmm [AVX] +// * VADDSS m32, xmm, xmm{k}{z} [AVX512F] +// * VADDSS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VADDSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VADDSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VADDSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VADDSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VADDSS takes 3 or 4 operands") + } + // VADDSS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDSS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VADDSS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x58) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VADDSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VADDSS") + } + return p +} + +// VADDSUBPD performs "Packed Double-FP Add/Subtract". +// +// Mnemonic : VADDSUBPD +// Supported forms : (4 forms) +// +// * VADDSUBPD xmm, xmm, xmm [AVX] +// * VADDSUBPD m128, xmm, xmm [AVX] +// * VADDSUBPD ymm, ymm, ymm [AVX] +// * VADDSUBPD m256, ymm, ymm [AVX] +// +func (self *Program) VADDSUBPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VADDSUBPD", 3, Operands { v0, v1, v2 }) + // VADDSUBPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDSUBPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd0) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDSUBPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDSUBPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd0) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VADDSUBPD") + } + return p +} + +// VADDSUBPS performs "Packed Single-FP Add/Subtract". +// +// Mnemonic : VADDSUBPS +// Supported forms : (4 forms) +// +// * VADDSUBPS xmm, xmm, xmm [AVX] +// * VADDSUBPS m128, xmm, xmm [AVX] +// * VADDSUBPS ymm, ymm, ymm [AVX] +// * VADDSUBPS m256, ymm, ymm [AVX] +// +func (self *Program) VADDSUBPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VADDSUBPS", 3, Operands { v0, v1, v2 }) + // VADDSUBPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDSUBPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd0) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDSUBPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDSUBPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd0) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VADDSUBPS") + } + return p +} + +// VAESDEC performs "Perform One Round of an AES Decryption Flow". +// +// Mnemonic : VAESDEC +// Supported forms : (2 forms) +// +// * VAESDEC xmm, xmm, xmm [AES,AVX] +// * VAESDEC m128, xmm, xmm [AES,AVX] +// +func (self *Program) VAESDEC(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VAESDEC", 3, Operands { v0, v1, v2 }) + // VAESDEC xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xde) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VAESDEC m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xde) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VAESDEC") + } + return p +} + +// VAESDECLAST performs "Perform Last Round of an AES Decryption Flow". +// +// Mnemonic : VAESDECLAST +// Supported forms : (2 forms) +// +// * VAESDECLAST xmm, xmm, xmm [AES,AVX] +// * VAESDECLAST m128, xmm, xmm [AES,AVX] +// +func (self *Program) VAESDECLAST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VAESDECLAST", 3, Operands { v0, v1, v2 }) + // VAESDECLAST xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VAESDECLAST m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VAESDECLAST") + } + return p +} + +// VAESENC performs "Perform One Round of an AES Encryption Flow". +// +// Mnemonic : VAESENC +// Supported forms : (2 forms) +// +// * VAESENC xmm, xmm, xmm [AES,AVX] +// * VAESENC m128, xmm, xmm [AES,AVX] +// +func (self *Program) VAESENC(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VAESENC", 3, Operands { v0, v1, v2 }) + // VAESENC xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VAESENC m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VAESENC") + } + return p +} + +// VAESENCLAST performs "Perform Last Round of an AES Encryption Flow". +// +// Mnemonic : VAESENCLAST +// Supported forms : (2 forms) +// +// * VAESENCLAST xmm, xmm, xmm [AES,AVX] +// * VAESENCLAST m128, xmm, xmm [AES,AVX] +// +func (self *Program) VAESENCLAST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VAESENCLAST", 3, Operands { v0, v1, v2 }) + // VAESENCLAST xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VAESENCLAST m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdd) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VAESENCLAST") + } + return p +} + +// VAESIMC performs "Perform the AES InvMixColumn Transformation". +// +// Mnemonic : VAESIMC +// Supported forms : (2 forms) +// +// * VAESIMC xmm, xmm [AES,AVX] +// * VAESIMC m128, xmm [AES,AVX] +// +func (self *Program) VAESIMC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VAESIMC", 2, Operands { v0, v1 }) + // VAESIMC xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VAESIMC m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0xdb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VAESIMC") + } + return p +} + +// VAESKEYGENASSIST performs "AES Round Key Generation Assist". +// +// Mnemonic : VAESKEYGENASSIST +// Supported forms : (2 forms) +// +// * VAESKEYGENASSIST imm8, xmm, xmm [AES,AVX] +// * VAESKEYGENASSIST imm8, m128, xmm [AES,AVX] +// +func (self *Program) VAESKEYGENASSIST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VAESKEYGENASSIST", 3, Operands { v0, v1, v2 }) + // VAESKEYGENASSIST imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VAESKEYGENASSIST imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VAESKEYGENASSIST") + } + return p +} + +// VALIGND performs "Align Doubleword Vectors". +// +// Mnemonic : VALIGND +// Supported forms : (6 forms) +// +// * VALIGND imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VALIGND imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VALIGND imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VALIGND imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VALIGND imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VALIGND imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VALIGND(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VALIGND", 4, Operands { v0, v1, v2, v3 }) + // VALIGND imm8, m512/m32bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGND imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x03) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGND imm8, m128/m32bcst, xmm, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGND imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x03) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGND imm8, m256/m32bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGND imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x03) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VALIGND") + } + return p +} + +// VALIGNQ performs "Align Quadword Vectors". +// +// Mnemonic : VALIGNQ +// Supported forms : (6 forms) +// +// * VALIGNQ imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VALIGNQ imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VALIGNQ imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VALIGNQ imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VALIGNQ imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VALIGNQ imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VALIGNQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VALIGNQ", 4, Operands { v0, v1, v2, v3 }) + // VALIGNQ imm8, m512/m64bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGNQ imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x03) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGNQ imm8, m128/m64bcst, xmm, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGNQ imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x03) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGNQ imm8, m256/m64bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGNQ imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x03) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VALIGNQ") + } + return p +} + +// VANDNPD performs "Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VANDNPD +// Supported forms : (10 forms) +// +// * VANDNPD xmm, xmm, xmm [AVX] +// * VANDNPD m128, xmm, xmm [AVX] +// * VANDNPD ymm, ymm, ymm [AVX] +// * VANDNPD m256, ymm, ymm [AVX] +// * VANDNPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VANDNPD zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VANDNPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDNPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDNPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VANDNPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VANDNPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VANDNPD", 3, Operands { v0, v1, v2 }) + // VANDNPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDNPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDNPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VANDNPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VANDNPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VANDNPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VANDNPD") + } + return p +} + +// VANDNPS performs "Bitwise Logical AND NOT of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VANDNPS +// Supported forms : (10 forms) +// +// * VANDNPS xmm, xmm, xmm [AVX] +// * VANDNPS m128, xmm, xmm [AVX] +// * VANDNPS ymm, ymm, ymm [AVX] +// * VANDNPS m256, ymm, ymm [AVX] +// * VANDNPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VANDNPS zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VANDNPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDNPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDNPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VANDNPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VANDNPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VANDNPS", 3, Operands { v0, v1, v2 }) + // VANDNPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDNPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDNPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VANDNPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VANDNPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VANDNPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VANDNPS") + } + return p +} + +// VANDPD performs "Bitwise Logical AND of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VANDPD +// Supported forms : (10 forms) +// +// * VANDPD xmm, xmm, xmm [AVX] +// * VANDPD m128, xmm, xmm [AVX] +// * VANDPD ymm, ymm, ymm [AVX] +// * VANDPD m256, ymm, ymm [AVX] +// * VANDPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VANDPD zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VANDPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VANDPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VANDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VANDPD", 3, Operands { v0, v1, v2 }) + // VANDPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VANDPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VANDPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VANDPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VANDPD") + } + return p +} + +// VANDPS performs "Bitwise Logical AND of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VANDPS +// Supported forms : (10 forms) +// +// * VANDPS xmm, xmm, xmm [AVX] +// * VANDPS m128, xmm, xmm [AVX] +// * VANDPS ymm, ymm, ymm [AVX] +// * VANDPS m256, ymm, ymm [AVX] +// * VANDPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VANDPS zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VANDPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VANDPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VANDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VANDPS", 3, Operands { v0, v1, v2 }) + // VANDPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VANDPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VANDPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VANDPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VANDPS") + } + return p +} + +// VBLENDMPD performs "Blend Packed Double-Precision Floating-Point Vectors Using an OpMask Control". +// +// Mnemonic : VBLENDMPD +// Supported forms : (6 forms) +// +// * VBLENDMPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VBLENDMPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VBLENDMPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VBLENDMPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VBLENDMPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VBLENDMPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VBLENDMPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VBLENDMPD", 3, Operands { v0, v1, v2 }) + // VBLENDMPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VBLENDMPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VBLENDMPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VBLENDMPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VBLENDMPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VBLENDMPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VBLENDMPD") + } + return p +} + +// VBLENDMPS performs "Blend Packed Single-Precision Floating-Point Vectors Using an OpMask Control". +// +// Mnemonic : VBLENDMPS +// Supported forms : (6 forms) +// +// * VBLENDMPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VBLENDMPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VBLENDMPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VBLENDMPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VBLENDMPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VBLENDMPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VBLENDMPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VBLENDMPS", 3, Operands { v0, v1, v2 }) + // VBLENDMPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VBLENDMPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VBLENDMPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VBLENDMPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VBLENDMPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VBLENDMPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VBLENDMPS") + } + return p +} + +// VBLENDPD performs "Blend Packed Double Precision Floating-Point Values". +// +// Mnemonic : VBLENDPD +// Supported forms : (4 forms) +// +// * VBLENDPD imm8, xmm, xmm, xmm [AVX] +// * VBLENDPD imm8, m128, xmm, xmm [AVX] +// * VBLENDPD imm8, ymm, ymm, ymm [AVX] +// * VBLENDPD imm8, m256, ymm, ymm [AVX] +// +func (self *Program) VBLENDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VBLENDPD", 4, Operands { v0, v1, v2, v3 }) + // VBLENDPD imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VBLENDPD imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VBLENDPD imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VBLENDPD imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VBLENDPD") + } + return p +} + +// VBLENDPS performs " Blend Packed Single Precision Floating-Point Values". +// +// Mnemonic : VBLENDPS +// Supported forms : (4 forms) +// +// * VBLENDPS imm8, xmm, xmm, xmm [AVX] +// * VBLENDPS imm8, m128, xmm, xmm [AVX] +// * VBLENDPS imm8, ymm, ymm, ymm [AVX] +// * VBLENDPS imm8, m256, ymm, ymm [AVX] +// +func (self *Program) VBLENDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VBLENDPS", 4, Operands { v0, v1, v2, v3 }) + // VBLENDPS imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VBLENDPS imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VBLENDPS imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VBLENDPS imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VBLENDPS") + } + return p +} + +// VBLENDVPD performs " Variable Blend Packed Double Precision Floating-Point Values". +// +// Mnemonic : VBLENDVPD +// Supported forms : (4 forms) +// +// * VBLENDVPD xmm, xmm, xmm, xmm [AVX] +// * VBLENDVPD xmm, m128, xmm, xmm [AVX] +// * VBLENDVPD ymm, ymm, ymm, ymm [AVX] +// * VBLENDVPD ymm, m256, ymm, ymm [AVX] +// +func (self *Program) VBLENDVPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VBLENDVPD", 4, Operands { v0, v1, v2, v3 }) + // VBLENDVPD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VBLENDVPD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x4b) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VBLENDVPD ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VBLENDVPD ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x4b) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VBLENDVPD") + } + return p +} + +// VBLENDVPS performs " Variable Blend Packed Single Precision Floating-Point Values". +// +// Mnemonic : VBLENDVPS +// Supported forms : (4 forms) +// +// * VBLENDVPS xmm, xmm, xmm, xmm [AVX] +// * VBLENDVPS xmm, m128, xmm, xmm [AVX] +// * VBLENDVPS ymm, ymm, ymm, ymm [AVX] +// * VBLENDVPS ymm, m256, ymm, ymm [AVX] +// +func (self *Program) VBLENDVPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VBLENDVPS", 4, Operands { v0, v1, v2, v3 }) + // VBLENDVPS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VBLENDVPS xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x4a) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VBLENDVPS ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VBLENDVPS ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x4a) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VBLENDVPS") + } + return p +} + +// VBROADCASTF128 performs "Broadcast 128 Bit of Floating-Point Data". +// +// Mnemonic : VBROADCASTF128 +// Supported forms : (1 form) +// +// * VBROADCASTF128 m128, ymm [AVX] +// +func (self *Program) VBROADCASTF128(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTF128", 2, Operands { v0, v1 }) + // VBROADCASTF128 m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x1a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTF128") + } + return p +} + +// VBROADCASTF32X2 performs "Broadcast Two Single-Precision Floating-Point Elements". +// +// Mnemonic : VBROADCASTF32X2 +// Supported forms : (4 forms) +// +// * VBROADCASTF32X2 xmm, zmm{k}{z} [AVX512DQ] +// * VBROADCASTF32X2 m64, zmm{k}{z} [AVX512DQ] +// * VBROADCASTF32X2 xmm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VBROADCASTF32X2 m64, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VBROADCASTF32X2(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTF32X2", 2, Operands { v0, v1 }) + // VBROADCASTF32X2 xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTF32X2 m64, zmm{k}{z} + if isM64(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VBROADCASTF32X2 xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTF32X2 m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTF32X2") + } + return p +} + +// VBROADCASTF32X4 performs "Broadcast Four Single-Precision Floating-Point Elements". +// +// Mnemonic : VBROADCASTF32X4 +// Supported forms : (2 forms) +// +// * VBROADCASTF32X4 m128, zmm{k}{z} [AVX512F] +// * VBROADCASTF32X4 m128, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VBROADCASTF32X4(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTF32X4", 2, Operands { v0, v1 }) + // VBROADCASTF32X4 m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VBROADCASTF32X4 m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTF32X4") + } + return p +} + +// VBROADCASTF32X8 performs "Broadcast Eight Single-Precision Floating-Point Elements". +// +// Mnemonic : VBROADCASTF32X8 +// Supported forms : (1 form) +// +// * VBROADCASTF32X8 m256, zmm{k}{z} [AVX512DQ] +// +func (self *Program) VBROADCASTF32X8(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTF32X8", 2, Operands { v0, v1 }) + // VBROADCASTF32X8 m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTF32X8") + } + return p +} + +// VBROADCASTF64X2 performs "Broadcast Two Double-Precision Floating-Point Elements". +// +// Mnemonic : VBROADCASTF64X2 +// Supported forms : (2 forms) +// +// * VBROADCASTF64X2 m128, zmm{k}{z} [AVX512DQ] +// * VBROADCASTF64X2 m128, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VBROADCASTF64X2(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTF64X2", 2, Operands { v0, v1 }) + // VBROADCASTF64X2 m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VBROADCASTF64X2 m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTF64X2") + } + return p +} + +// VBROADCASTF64X4 performs "Broadcast Four Double-Precision Floating-Point Elements". +// +// Mnemonic : VBROADCASTF64X4 +// Supported forms : (1 form) +// +// * VBROADCASTF64X4 m256, zmm{k}{z} [AVX512F] +// +func (self *Program) VBROADCASTF64X4(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTF64X4", 2, Operands { v0, v1 }) + // VBROADCASTF64X4 m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTF64X4") + } + return p +} + +// VBROADCASTI128 performs "Broadcast 128 Bits of Integer Data". +// +// Mnemonic : VBROADCASTI128 +// Supported forms : (1 form) +// +// * VBROADCASTI128 m128, ymm [AVX2] +// +func (self *Program) VBROADCASTI128(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTI128", 2, Operands { v0, v1 }) + // VBROADCASTI128 m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTI128") + } + return p +} + +// VBROADCASTI32X2 performs "Broadcast Two Doubleword Elements". +// +// Mnemonic : VBROADCASTI32X2 +// Supported forms : (6 forms) +// +// * VBROADCASTI32X2 xmm, zmm{k}{z} [AVX512DQ] +// * VBROADCASTI32X2 m64, zmm{k}{z} [AVX512DQ] +// * VBROADCASTI32X2 xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VBROADCASTI32X2 xmm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VBROADCASTI32X2 m64, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VBROADCASTI32X2 m64, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VBROADCASTI32X2(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTI32X2", 2, Operands { v0, v1 }) + // VBROADCASTI32X2 xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTI32X2 m64, zmm{k}{z} + if isM64(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VBROADCASTI32X2 xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTI32X2 xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTI32X2 m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VBROADCASTI32X2 m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTI32X2") + } + return p +} + +// VBROADCASTI32X4 performs "Broadcast Four Doubleword Elements". +// +// Mnemonic : VBROADCASTI32X4 +// Supported forms : (2 forms) +// +// * VBROADCASTI32X4 m128, zmm{k}{z} [AVX512F] +// * VBROADCASTI32X4 m128, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VBROADCASTI32X4(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTI32X4", 2, Operands { v0, v1 }) + // VBROADCASTI32X4 m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VBROADCASTI32X4 m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTI32X4") + } + return p +} + +// VBROADCASTI32X8 performs "Broadcast Eight Doubleword Elements". +// +// Mnemonic : VBROADCASTI32X8 +// Supported forms : (1 form) +// +// * VBROADCASTI32X8 m256, zmm{k}{z} [AVX512DQ] +// +func (self *Program) VBROADCASTI32X8(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTI32X8", 2, Operands { v0, v1 }) + // VBROADCASTI32X8 m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTI32X8") + } + return p +} + +// VBROADCASTI64X2 performs "Broadcast Two Quadword Elements". +// +// Mnemonic : VBROADCASTI64X2 +// Supported forms : (2 forms) +// +// * VBROADCASTI64X2 m128, zmm{k}{z} [AVX512DQ] +// * VBROADCASTI64X2 m128, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VBROADCASTI64X2(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTI64X2", 2, Operands { v0, v1 }) + // VBROADCASTI64X2 m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VBROADCASTI64X2 m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTI64X2") + } + return p +} + +// VBROADCASTI64X4 performs "Broadcast Four Quadword Elements". +// +// Mnemonic : VBROADCASTI64X4 +// Supported forms : (1 form) +// +// * VBROADCASTI64X4 m256, zmm{k}{z} [AVX512F] +// +func (self *Program) VBROADCASTI64X4(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTI64X4", 2, Operands { v0, v1 }) + // VBROADCASTI64X4 m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTI64X4") + } + return p +} + +// VBROADCASTSD performs "Broadcast Double-Precision Floating-Point Element". +// +// Mnemonic : VBROADCASTSD +// Supported forms : (6 forms) +// +// * VBROADCASTSD m64, ymm [AVX] +// * VBROADCASTSD xmm, ymm [AVX2] +// * VBROADCASTSD xmm, zmm{k}{z} [AVX512F] +// * VBROADCASTSD m64, zmm{k}{z} [AVX512F] +// * VBROADCASTSD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VBROADCASTSD m64, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VBROADCASTSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTSD", 2, Operands { v0, v1 }) + // VBROADCASTSD m64, ymm + if isM64(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VBROADCASTSD xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTSD xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTSD m64, zmm{k}{z} + if isM64(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VBROADCASTSD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTSD m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTSD") + } + return p +} + +// VBROADCASTSS performs "Broadcast Single-Precision Floating-Point Element". +// +// Mnemonic : VBROADCASTSS +// Supported forms : (8 forms) +// +// * VBROADCASTSS m32, xmm [AVX] +// * VBROADCASTSS m32, ymm [AVX] +// * VBROADCASTSS xmm, xmm [AVX2] +// * VBROADCASTSS xmm, ymm [AVX2] +// * VBROADCASTSS xmm, zmm{k}{z} [AVX512F] +// * VBROADCASTSS m32, zmm{k}{z} [AVX512F] +// * VBROADCASTSS xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VBROADCASTSS m32, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VBROADCASTSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTSS", 2, Operands { v0, v1 }) + // VBROADCASTSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x18) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VBROADCASTSS m32, ymm + if isM32(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x18) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VBROADCASTSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x18) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTSS xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x18) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTSS xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x18) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTSS m32, zmm{k}{z} + if isM32(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x18) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VBROADCASTSS xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x18) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTSS m32, ymm{k}{z} + if isM32(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x18) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTSS") + } + return p +} + +// VCMPPD performs "Compare Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VCMPPD +// Supported forms : (11 forms) +// +// * VCMPPD imm8, xmm, xmm, xmm [AVX] +// * VCMPPD imm8, m128, xmm, xmm [AVX] +// * VCMPPD imm8, ymm, ymm, ymm [AVX] +// * VCMPPD imm8, m256, ymm, ymm [AVX] +// * VCMPPD imm8, m512/m64bcst, zmm, k{k} [AVX512F] +// * VCMPPD imm8, {sae}, zmm, zmm, k{k} [AVX512F] +// * VCMPPD imm8, zmm, zmm, k{k} [AVX512F] +// * VCMPPD imm8, m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VCMPPD imm8, xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VCMPPD imm8, m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VCMPPD imm8, ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VCMPPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCMPPD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VCMPPD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VCMPPD takes 4 or 5 operands") + } + // VCMPPD imm8, xmm, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, m128, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, ymm, ymm, ymm + if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, m256, ymm, ymm + if len(vv) == 0 && isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, m512/m64bcst, zmm, k{k} + if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, {sae}, zmm, zmm, k{k} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isKk(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, zmm, zmm, k{k} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, m128/m64bcst, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, xmm, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, m256/m64bcst, ymm, k{k} + if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, ymm, ymm, k{k} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCMPPD") + } + return p +} + +// VCMPPS performs "Compare Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VCMPPS +// Supported forms : (11 forms) +// +// * VCMPPS imm8, xmm, xmm, xmm [AVX] +// * VCMPPS imm8, m128, xmm, xmm [AVX] +// * VCMPPS imm8, ymm, ymm, ymm [AVX] +// * VCMPPS imm8, m256, ymm, ymm [AVX] +// * VCMPPS imm8, m512/m32bcst, zmm, k{k} [AVX512F] +// * VCMPPS imm8, {sae}, zmm, zmm, k{k} [AVX512F] +// * VCMPPS imm8, zmm, zmm, k{k} [AVX512F] +// * VCMPPS imm8, m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VCMPPS imm8, xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VCMPPS imm8, m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VCMPPS imm8, ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VCMPPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCMPPS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VCMPPS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VCMPPS takes 4 or 5 operands") + } + // VCMPPS imm8, xmm, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, m128, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, ymm, ymm, ymm + if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, m256, ymm, ymm + if len(vv) == 0 && isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, m512/m32bcst, zmm, k{k} + if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, {sae}, zmm, zmm, k{k} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isKk(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7c ^ (hlcode(v[3]) << 3)) + m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, zmm, zmm, k{k} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, m128/m32bcst, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, xmm, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, m256/m32bcst, ymm, k{k} + if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, ymm, ymm, k{k} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCMPPS") + } + return p +} + +// VCMPSD performs "Compare Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VCMPSD +// Supported forms : (5 forms) +// +// * VCMPSD imm8, xmm, xmm, xmm [AVX] +// * VCMPSD imm8, m64, xmm, xmm [AVX] +// * VCMPSD imm8, m64, xmm, k{k} [AVX512F] +// * VCMPSD imm8, {sae}, xmm, xmm, k{k} [AVX512F] +// * VCMPSD imm8, xmm, xmm, k{k} [AVX512F] +// +func (self *Program) VCMPSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCMPSD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VCMPSD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VCMPSD takes 4 or 5 operands") + } + // VCMPSD imm8, xmm, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSD imm8, m64, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSD imm8, m64, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSD imm8, {sae}, xmm, xmm, k{k} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isKk(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xff ^ (hlcode(v[3]) << 3)) + m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSD imm8, xmm, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCMPSD") + } + return p +} + +// VCMPSS performs "Compare Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VCMPSS +// Supported forms : (5 forms) +// +// * VCMPSS imm8, xmm, xmm, xmm [AVX] +// * VCMPSS imm8, m32, xmm, xmm [AVX] +// * VCMPSS imm8, m32, xmm, k{k} [AVX512F] +// * VCMPSS imm8, {sae}, xmm, xmm, k{k} [AVX512F] +// * VCMPSS imm8, xmm, xmm, k{k} [AVX512F] +// +func (self *Program) VCMPSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCMPSS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VCMPSS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VCMPSS takes 4 or 5 operands") + } + // VCMPSS imm8, xmm, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSS imm8, m32, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSS imm8, m32, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSS imm8, {sae}, xmm, xmm, k{k} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isKk(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7e ^ (hlcode(v[3]) << 3)) + m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSS imm8, xmm, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCMPSS") + } + return p +} + +// VCOMISD performs "Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : VCOMISD +// Supported forms : (5 forms) +// +// * VCOMISD xmm, xmm [AVX] +// * VCOMISD m64, xmm [AVX] +// * VCOMISD m64, xmm [AVX512F] +// * VCOMISD {sae}, xmm, xmm [AVX512F] +// * VCOMISD xmm, xmm [AVX512F] +// +func (self *Program) VCOMISD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCOMISD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCOMISD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCOMISD takes 2 or 3 operands") + } + // VCOMISD xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCOMISD m64, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCOMISD m64, xmm + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2f) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCOMISD {sae}, xmm, xmm + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit(0x18) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCOMISD xmm, xmm + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit(0x48) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCOMISD") + } + return p +} + +// VCOMISS performs "Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : VCOMISS +// Supported forms : (5 forms) +// +// * VCOMISS xmm, xmm [AVX] +// * VCOMISS m32, xmm [AVX] +// * VCOMISS m32, xmm [AVX512F] +// * VCOMISS {sae}, xmm, xmm [AVX512F] +// * VCOMISS xmm, xmm [AVX512F] +// +func (self *Program) VCOMISS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCOMISS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCOMISS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCOMISS takes 2 or 3 operands") + } + // VCOMISS xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCOMISS m32, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCOMISS m32, xmm + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2f) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCOMISS {sae}, xmm, xmm + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c) + m.emit(0x18) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCOMISS xmm, xmm + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit(0x48) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCOMISS") + } + return p +} + +// VCOMPRESSPD performs "Store Sparse Packed Double-Precision Floating-Point Values into Dense Memory/Register". +// +// Mnemonic : VCOMPRESSPD +// Supported forms : (6 forms) +// +// * VCOMPRESSPD zmm, zmm{k}{z} [AVX512F] +// * VCOMPRESSPD zmm, m512{k}{z} [AVX512F] +// * VCOMPRESSPD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCOMPRESSPD xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VCOMPRESSPD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VCOMPRESSPD ymm, m256{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCOMPRESSPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VCOMPRESSPD", 2, Operands { v0, v1 }) + // VCOMPRESSPD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x8a) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VCOMPRESSPD zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8a) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VCOMPRESSPD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x8a) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VCOMPRESSPD xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8a) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VCOMPRESSPD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x8a) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VCOMPRESSPD ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8a) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VCOMPRESSPD") + } + return p +} + +// VCOMPRESSPS performs "Store Sparse Packed Single-Precision Floating-Point Values into Dense Memory/Register". +// +// Mnemonic : VCOMPRESSPS +// Supported forms : (6 forms) +// +// * VCOMPRESSPS zmm, zmm{k}{z} [AVX512F] +// * VCOMPRESSPS zmm, m512{k}{z} [AVX512F] +// * VCOMPRESSPS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCOMPRESSPS xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VCOMPRESSPS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VCOMPRESSPS ymm, m256{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCOMPRESSPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VCOMPRESSPS", 2, Operands { v0, v1 }) + // VCOMPRESSPS zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x8a) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VCOMPRESSPS zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8a) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VCOMPRESSPS xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x8a) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VCOMPRESSPS xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8a) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VCOMPRESSPS ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x8a) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VCOMPRESSPS ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8a) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VCOMPRESSPS") + } + return p +} + +// VCVTDQ2PD performs "Convert Packed Dword Integers to Packed Double-Precision FP Values". +// +// Mnemonic : VCVTDQ2PD +// Supported forms : (10 forms) +// +// * VCVTDQ2PD xmm, xmm [AVX] +// * VCVTDQ2PD m64, xmm [AVX] +// * VCVTDQ2PD xmm, ymm [AVX] +// * VCVTDQ2PD m128, ymm [AVX] +// * VCVTDQ2PD m256/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTDQ2PD ymm, zmm{k}{z} [AVX512F] +// * VCVTDQ2PD m64/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTDQ2PD m128/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTDQ2PD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTDQ2PD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTDQ2PD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VCVTDQ2PD", 2, Operands { v0, v1 }) + // VCVTDQ2PD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTDQ2PD xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), v[0], 0) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PD m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTDQ2PD m256/m32bcst, zmm{k}{z} + if isM256M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTDQ2PD ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PD m64/m32bcst, xmm{k}{z} + if isM64M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTDQ2PD m128/m32bcst, ymm{k}{z} + if isM128M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTDQ2PD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTDQ2PD") + } + return p +} + +// VCVTDQ2PS performs "Convert Packed Dword Integers to Packed Single-Precision FP Values". +// +// Mnemonic : VCVTDQ2PS +// Supported forms : (11 forms) +// +// * VCVTDQ2PS xmm, xmm [AVX] +// * VCVTDQ2PS m128, xmm [AVX] +// * VCVTDQ2PS ymm, ymm [AVX] +// * VCVTDQ2PS m256, ymm [AVX] +// * VCVTDQ2PS m512/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTDQ2PS {er}, zmm, zmm{k}{z} [AVX512F] +// * VCVTDQ2PS zmm, zmm{k}{z} [AVX512F] +// * VCVTDQ2PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTDQ2PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTDQ2PS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTDQ2PS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTDQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTDQ2PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTDQ2PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTDQ2PS takes 2 or 3 operands") + } + // VCVTDQ2PS xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PS m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTDQ2PS ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PS m256, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTDQ2PS m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTDQ2PS {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTDQ2PS zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PS m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTDQ2PS m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTDQ2PS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PS ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTDQ2PS") + } + return p +} + +// VCVTPD2DQ performs "Convert Packed Double-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : VCVTPD2DQ +// Supported forms : (11 forms) +// +// * VCVTPD2DQ xmm, xmm [AVX] +// * VCVTPD2DQ ymm, xmm [AVX] +// * VCVTPD2DQ m128, xmm [AVX] +// * VCVTPD2DQ m256, xmm [AVX] +// * VCVTPD2DQ m512/m64bcst, ymm{k}{z} [AVX512F] +// * VCVTPD2DQ {er}, zmm, ymm{k}{z} [AVX512F] +// * VCVTPD2DQ zmm, ymm{k}{z} [AVX512F] +// * VCVTPD2DQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2DQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2DQ ymm, xmm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTPD2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPD2DQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPD2DQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPD2DQ takes 2 or 3 operands") + } + // VCVTPD2DQ xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), v[0], 0) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2DQ ymm, xmm + if len(vv) == 0 && isYMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[1]), v[0], 0) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2DQ m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPD2DQ m256, xmm + if len(vv) == 0 && isM256(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPD2DQ m512/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTPD2DQ {er}, zmm, ymm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPD2DQ zmm, ymm{k}{z} + if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2DQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPD2DQ m256/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPD2DQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2DQ ymm, xmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPD2DQ") + } + return p +} + +// VCVTPD2PS performs "Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values". +// +// Mnemonic : VCVTPD2PS +// Supported forms : (11 forms) +// +// * VCVTPD2PS xmm, xmm [AVX] +// * VCVTPD2PS ymm, xmm [AVX] +// * VCVTPD2PS m128, xmm [AVX] +// * VCVTPD2PS m256, xmm [AVX] +// * VCVTPD2PS m512/m64bcst, ymm{k}{z} [AVX512F] +// * VCVTPD2PS {er}, zmm, ymm{k}{z} [AVX512F] +// * VCVTPD2PS zmm, ymm{k}{z} [AVX512F] +// * VCVTPD2PS m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2PS m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2PS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2PS ymm, xmm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTPD2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPD2PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPD2PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPD2PS takes 2 or 3 operands") + } + // VCVTPD2PS xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2PS ymm, xmm + if len(vv) == 0 && isYMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2PS m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPD2PS m256, xmm + if len(vv) == 0 && isM256(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPD2PS m512/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTPD2PS {er}, zmm, ymm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPD2PS zmm, ymm{k}{z} + if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2PS m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPD2PS m256/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPD2PS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2PS ymm, xmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPD2PS") + } + return p +} + +// VCVTPD2QQ performs "Convert Packed Double-Precision Floating-Point Values to Packed Quadword Integers". +// +// Mnemonic : VCVTPD2QQ +// Supported forms : (7 forms) +// +// * VCVTPD2QQ m512/m64bcst, zmm{k}{z} [AVX512DQ] +// * VCVTPD2QQ {er}, zmm, zmm{k}{z} [AVX512DQ] +// * VCVTPD2QQ zmm, zmm{k}{z} [AVX512DQ] +// * VCVTPD2QQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPD2QQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPD2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPD2QQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTPD2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPD2QQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPD2QQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPD2QQ takes 2 or 3 operands") + } + // VCVTPD2QQ m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7b) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTPD2QQ {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPD2QQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2QQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7b) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPD2QQ m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPD2QQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2QQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPD2QQ") + } + return p +} + +// VCVTPD2UDQ performs "Convert Packed Double-Precision Floating-Point Values to Packed Unsigned Doubleword Integers". +// +// Mnemonic : VCVTPD2UDQ +// Supported forms : (7 forms) +// +// * VCVTPD2UDQ m512/m64bcst, ymm{k}{z} [AVX512F] +// * VCVTPD2UDQ {er}, zmm, ymm{k}{z} [AVX512F] +// * VCVTPD2UDQ zmm, ymm{k}{z} [AVX512F] +// * VCVTPD2UDQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2UDQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2UDQ ymm, xmm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTPD2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPD2UDQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPD2UDQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPD2UDQ takes 2 or 3 operands") + } + // VCVTPD2UDQ m512/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTPD2UDQ {er}, zmm, ymm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPD2UDQ zmm, ymm{k}{z} + if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2UDQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPD2UDQ m256/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPD2UDQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2UDQ ymm, xmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPD2UDQ") + } + return p +} + +// VCVTPD2UQQ performs "Convert Packed Double-Precision Floating-Point Values to Packed Unsigned Quadword Integers". +// +// Mnemonic : VCVTPD2UQQ +// Supported forms : (7 forms) +// +// * VCVTPD2UQQ m512/m64bcst, zmm{k}{z} [AVX512DQ] +// * VCVTPD2UQQ {er}, zmm, zmm{k}{z} [AVX512DQ] +// * VCVTPD2UQQ zmm, zmm{k}{z} [AVX512DQ] +// * VCVTPD2UQQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPD2UQQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPD2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPD2UQQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTPD2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPD2UQQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPD2UQQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPD2UQQ takes 2 or 3 operands") + } + // VCVTPD2UQQ m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTPD2UQQ {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPD2UQQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2UQQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPD2UQQ m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPD2UQQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2UQQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPD2UQQ") + } + return p +} + +// VCVTPH2PS performs "Convert Half-Precision FP Values to Single-Precision FP Values". +// +// Mnemonic : VCVTPH2PS +// Supported forms : (11 forms) +// +// * VCVTPH2PS xmm, xmm [F16C] +// * VCVTPH2PS m64, xmm [F16C] +// * VCVTPH2PS xmm, ymm [F16C] +// * VCVTPH2PS m128, ymm [F16C] +// * VCVTPH2PS m256, zmm{k}{z} [AVX512F] +// * VCVTPH2PS {sae}, ymm, zmm{k}{z} [AVX512F] +// * VCVTPH2PS ymm, zmm{k}{z} [AVX512F] +// * VCVTPH2PS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPH2PS xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTPH2PS m64, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPH2PS m128, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTPH2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPH2PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPH2PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPH2PS takes 2 or 3 operands") + } + // VCVTPH2PS xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPH2PS m64, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPH2PS xmm, ymm + if len(vv) == 0 && isXMM(v0) && isYMM(v1) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPH2PS m128, ymm + if len(vv) == 0 && isM128(v0) && isYMM(v1) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPH2PS m256, zmm{k}{z} + if len(vv) == 0 && isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPH2PS {sae}, ymm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x13) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPH2PS ymm, zmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPH2PS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPH2PS xmm, ymm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPH2PS m64, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTPH2PS m128, ymm{k}{z} + if len(vv) == 0 && isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPH2PS") + } + return p +} + +// VCVTPS2DQ performs "Convert Packed Single-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : VCVTPS2DQ +// Supported forms : (11 forms) +// +// * VCVTPS2DQ xmm, xmm [AVX] +// * VCVTPS2DQ m128, xmm [AVX] +// * VCVTPS2DQ ymm, ymm [AVX] +// * VCVTPS2DQ m256, ymm [AVX] +// * VCVTPS2DQ m512/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTPS2DQ {er}, zmm, zmm{k}{z} [AVX512F] +// * VCVTPS2DQ zmm, zmm{k}{z} [AVX512F] +// * VCVTPS2DQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2DQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2DQ ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTPS2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPS2DQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPS2DQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPS2DQ takes 2 or 3 operands") + } + // VCVTPS2DQ xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2DQ m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPS2DQ ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2DQ m256, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPS2DQ m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTPS2DQ {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPS2DQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2DQ m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPS2DQ m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPS2DQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2DQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPS2DQ") + } + return p +} + +// VCVTPS2PD performs "Convert Packed Single-Precision FP Values to Packed Double-Precision FP Values". +// +// Mnemonic : VCVTPS2PD +// Supported forms : (11 forms) +// +// * VCVTPS2PD xmm, xmm [AVX] +// * VCVTPS2PD m64, xmm [AVX] +// * VCVTPS2PD xmm, ymm [AVX] +// * VCVTPS2PD m128, ymm [AVX] +// * VCVTPS2PD m256/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTPS2PD {sae}, ymm, zmm{k}{z} [AVX512F] +// * VCVTPS2PD ymm, zmm{k}{z} [AVX512F] +// * VCVTPS2PD m64/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2PD m128/m32bcst, ymm{k}{z} [AVX512VL] +// * VCVTPS2PD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2PD xmm, ymm{k}{z} [AVX512VL] +// +func (self *Program) VCVTPS2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPS2PD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPS2PD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPS2PD takes 2 or 3 operands") + } + // VCVTPS2PD xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2PD m64, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPS2PD xmm, ymm + if len(vv) == 0 && isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2PD m128, ymm + if len(vv) == 0 && isM128(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPS2PD m256/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPS2PD {sae}, ymm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPS2PD ymm, zmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2PD m64/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTPS2PD m128/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPS2PD xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2PD xmm, ymm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPS2PD") + } + return p +} + +// VCVTPS2PH performs "Convert Single-Precision FP value to Half-Precision FP value". +// +// Mnemonic : VCVTPS2PH +// Supported forms : (11 forms) +// +// * VCVTPS2PH imm8, xmm, xmm [F16C] +// * VCVTPS2PH imm8, ymm, xmm [F16C] +// * VCVTPS2PH imm8, xmm, m64 [F16C] +// * VCVTPS2PH imm8, ymm, m128 [F16C] +// * VCVTPS2PH imm8, zmm, m256{k}{z} [AVX512F] +// * VCVTPS2PH imm8, {sae}, zmm, ymm{k}{z} [AVX512F] +// * VCVTPS2PH imm8, zmm, ymm{k}{z} [AVX512F] +// * VCVTPS2PH imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2PH imm8, xmm, m64{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2PH imm8, ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2PH imm8, ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTPS2PH(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPS2PH", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VCVTPS2PH", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VCVTPS2PH takes 3 or 4 operands") + } + // VCVTPS2PH imm8, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x79) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, ymm, xmm + if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isXMM(v2) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x7d) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, xmm, m64 + if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isM64(v2) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, ymm, m128 + if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isM128(v2) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[1]), addr(v[2]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, zmm, m256{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isM256kz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[2]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, {sae}, zmm, ymm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isYMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[3]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[3])) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, zmm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, xmm, m64{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isM64kz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[2]), 8) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, ymm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, ymm, m128{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPS2PH") + } + return p +} + +// VCVTPS2QQ performs "Convert Packed Single Precision Floating-Point Values to Packed Singed Quadword Integer Values". +// +// Mnemonic : VCVTPS2QQ +// Supported forms : (7 forms) +// +// * VCVTPS2QQ m256/m32bcst, zmm{k}{z} [AVX512DQ] +// * VCVTPS2QQ {er}, ymm, zmm{k}{z} [AVX512DQ] +// * VCVTPS2QQ ymm, zmm{k}{z} [AVX512DQ] +// * VCVTPS2QQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPS2QQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPS2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPS2QQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTPS2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPS2QQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPS2QQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPS2QQ takes 2 or 3 operands") + } + // VCVTPS2QQ m256/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPS2QQ {er}, ymm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPS2QQ ymm, zmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2QQ m64/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7b) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTPS2QQ m128/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7b) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPS2QQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2QQ xmm, ymm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPS2QQ") + } + return p +} + +// VCVTPS2UDQ performs "Convert Packed Single-Precision Floating-Point Values to Packed Unsigned Doubleword Integer Values". +// +// Mnemonic : VCVTPS2UDQ +// Supported forms : (7 forms) +// +// * VCVTPS2UDQ m512/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTPS2UDQ {er}, zmm, zmm{k}{z} [AVX512F] +// * VCVTPS2UDQ zmm, zmm{k}{z} [AVX512F] +// * VCVTPS2UDQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2UDQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2UDQ ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTPS2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPS2UDQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPS2UDQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPS2UDQ takes 2 or 3 operands") + } + // VCVTPS2UDQ m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTPS2UDQ {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPS2UDQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2UDQ m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPS2UDQ m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPS2UDQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2UDQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPS2UDQ") + } + return p +} + +// VCVTPS2UQQ performs "Convert Packed Single Precision Floating-Point Values to Packed Unsigned Quadword Integer Values". +// +// Mnemonic : VCVTPS2UQQ +// Supported forms : (7 forms) +// +// * VCVTPS2UQQ m256/m32bcst, zmm{k}{z} [AVX512DQ] +// * VCVTPS2UQQ {er}, ymm, zmm{k}{z} [AVX512DQ] +// * VCVTPS2UQQ ymm, zmm{k}{z} [AVX512DQ] +// * VCVTPS2UQQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPS2UQQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPS2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPS2UQQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTPS2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPS2UQQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPS2UQQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPS2UQQ takes 2 or 3 operands") + } + // VCVTPS2UQQ m256/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPS2UQQ {er}, ymm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPS2UQQ ymm, zmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2UQQ m64/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTPS2UQQ m128/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPS2UQQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2UQQ xmm, ymm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPS2UQQ") + } + return p +} + +// VCVTQQ2PD performs "Convert Packed Quadword Integers to Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VCVTQQ2PD +// Supported forms : (7 forms) +// +// * VCVTQQ2PD m512/m64bcst, zmm{k}{z} [AVX512DQ] +// * VCVTQQ2PD {er}, zmm, zmm{k}{z} [AVX512DQ] +// * VCVTQQ2PD zmm, zmm{k}{z} [AVX512DQ] +// * VCVTQQ2PD m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTQQ2PD m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTQQ2PD xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTQQ2PD ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTQQ2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTQQ2PD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTQQ2PD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTQQ2PD takes 2 or 3 operands") + } + // VCVTQQ2PD m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTQQ2PD {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTQQ2PD zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTQQ2PD m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTQQ2PD m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTQQ2PD xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTQQ2PD ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTQQ2PD") + } + return p +} + +// VCVTQQ2PS performs "Convert Packed Quadword Integers to Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VCVTQQ2PS +// Supported forms : (7 forms) +// +// * VCVTQQ2PS m512/m64bcst, ymm{k}{z} [AVX512DQ] +// * VCVTQQ2PS {er}, zmm, ymm{k}{z} [AVX512DQ] +// * VCVTQQ2PS zmm, ymm{k}{z} [AVX512DQ] +// * VCVTQQ2PS m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTQQ2PS m256/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTQQ2PS xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTQQ2PS ymm, xmm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTQQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTQQ2PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTQQ2PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTQQ2PS takes 2 or 3 operands") + } + // VCVTQQ2PS m512/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTQQ2PS {er}, zmm, ymm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTQQ2PS zmm, ymm{k}{z} + if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTQQ2PS m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTQQ2PS m256/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTQQ2PS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTQQ2PS ymm, xmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTQQ2PS") + } + return p +} + +// VCVTSD2SI performs "Convert Scalar Double-Precision FP Value to Integer". +// +// Mnemonic : VCVTSD2SI +// Supported forms : (10 forms) +// +// * VCVTSD2SI xmm, r32 [AVX] +// * VCVTSD2SI m64, r32 [AVX] +// * VCVTSD2SI xmm, r64 [AVX] +// * VCVTSD2SI m64, r64 [AVX] +// * VCVTSD2SI m64, r32 [AVX512F] +// * VCVTSD2SI m64, r64 [AVX512F] +// * VCVTSD2SI {er}, xmm, r32 [AVX512F] +// * VCVTSD2SI {er}, xmm, r64 [AVX512F] +// * VCVTSD2SI xmm, r32 [AVX512F] +// * VCVTSD2SI xmm, r64 [AVX512F] +// +func (self *Program) VCVTSD2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSD2SI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTSD2SI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTSD2SI takes 2 or 3 operands") + } + // VCVTSD2SI xmm, r32 + if len(vv) == 0 && isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), v[0], 0) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSD2SI m64, r32 + if len(vv) == 0 && isM64(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTSD2SI xmm, r64 + if len(vv) == 0 && isXMM(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfb) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSD2SI m64, r64 + if len(vv) == 0 && isM64(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x83, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTSD2SI m64, r32 + if len(vv) == 0 && isM64(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTSD2SI m64, r64 + if len(vv) == 0 && isM64(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTSD2SI {er}, xmm, r32 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSD2SI {er}, xmm, r64 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSD2SI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit(0x48) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSD2SI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit(0x48) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSD2SI") + } + return p +} + +// VCVTSD2SS performs "Convert Scalar Double-Precision FP Value to Scalar Single-Precision FP Value". +// +// Mnemonic : VCVTSD2SS +// Supported forms : (5 forms) +// +// * VCVTSD2SS xmm, xmm, xmm [AVX] +// * VCVTSD2SS m64, xmm, xmm [AVX] +// * VCVTSD2SS m64, xmm, xmm{k}{z} [AVX512F] +// * VCVTSD2SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VCVTSD2SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VCVTSD2SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSD2SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VCVTSD2SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VCVTSD2SS takes 3 or 4 operands") + } + // VCVTSD2SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSD2SS m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VCVTSD2SS m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VCVTSD2SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTSD2SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSD2SS") + } + return p +} + +// VCVTSD2USI performs "Convert Scalar Double-Precision Floating-Point Value to Unsigned Doubleword Integer". +// +// Mnemonic : VCVTSD2USI +// Supported forms : (6 forms) +// +// * VCVTSD2USI m64, r32 [AVX512F] +// * VCVTSD2USI m64, r64 [AVX512F] +// * VCVTSD2USI {er}, xmm, r32 [AVX512F] +// * VCVTSD2USI {er}, xmm, r64 [AVX512F] +// * VCVTSD2USI xmm, r32 [AVX512F] +// * VCVTSD2USI xmm, r64 [AVX512F] +// +func (self *Program) VCVTSD2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSD2USI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTSD2USI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTSD2USI takes 2 or 3 operands") + } + // VCVTSD2USI m64, r32 + if len(vv) == 0 && isM64(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTSD2USI m64, r64 + if len(vv) == 0 && isM64(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTSD2USI {er}, xmm, r32 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSD2USI {er}, xmm, r64 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSD2USI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit(0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSD2USI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit(0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSD2USI") + } + return p +} + +// VCVTSI2SD performs "Convert Dword Integer to Scalar Double-Precision FP Value". +// +// Mnemonic : VCVTSI2SD +// Supported forms : (9 forms) +// +// * VCVTSI2SD r32, xmm, xmm [AVX] +// * VCVTSI2SD r64, xmm, xmm [AVX] +// * VCVTSI2SD m32, xmm, xmm [AVX] +// * VCVTSI2SD m64, xmm, xmm [AVX] +// * VCVTSI2SD r32, xmm, xmm [AVX512F] +// * VCVTSI2SD m32, xmm, xmm [AVX512F] +// * VCVTSI2SD m64, xmm, xmm [AVX512F] +// * VCVTSI2SD {er}, r64, xmm, xmm [AVX512F] +// * VCVTSI2SD r64, xmm, xmm [AVX512F] +// +func (self *Program) VCVTSI2SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSI2SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VCVTSI2SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VCVTSI2SD takes 3 or 4 operands") + } + // VCVTSI2SD r32, xmm, xmm + if len(vv) == 0 && isReg32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSI2SD r64, xmm, xmm + if len(vv) == 0 && isReg64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfb ^ (hlcode(v[1]) << 3)) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSI2SD m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VCVTSI2SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x83, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VCVTSI2SD r32, xmm, xmm + if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSI2SD m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VCVTSI2SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VCVTSI2SD {er}, r64, xmm, xmm + if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTSI2SD r64, xmm, xmm + if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSI2SD") + } + return p +} + +// VCVTSI2SS performs "Convert Dword Integer to Scalar Single-Precision FP Value". +// +// Mnemonic : VCVTSI2SS +// Supported forms : (10 forms) +// +// * VCVTSI2SS r32, xmm, xmm [AVX] +// * VCVTSI2SS r64, xmm, xmm [AVX] +// * VCVTSI2SS m32, xmm, xmm [AVX] +// * VCVTSI2SS m64, xmm, xmm [AVX] +// * VCVTSI2SS m32, xmm, xmm [AVX512F] +// * VCVTSI2SS m64, xmm, xmm [AVX512F] +// * VCVTSI2SS {er}, r32, xmm, xmm [AVX512F] +// * VCVTSI2SS {er}, r64, xmm, xmm [AVX512F] +// * VCVTSI2SS r32, xmm, xmm [AVX512F] +// * VCVTSI2SS r64, xmm, xmm [AVX512F] +// +func (self *Program) VCVTSI2SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSI2SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VCVTSI2SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VCVTSI2SS takes 3 or 4 operands") + } + // VCVTSI2SS r32, xmm, xmm + if len(vv) == 0 && isReg32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSI2SS r64, xmm, xmm + if len(vv) == 0 && isReg64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfa ^ (hlcode(v[1]) << 3)) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSI2SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VCVTSI2SS m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x82, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VCVTSI2SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VCVTSI2SS m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VCVTSI2SS {er}, r32, xmm, xmm + if len(vv) == 1 && isER(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTSI2SS {er}, r64, xmm, xmm + if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfe ^ (hlcode(v[2]) << 3)) + m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTSI2SS r32, xmm, xmm + if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSI2SS r64, xmm, xmm + if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSI2SS") + } + return p +} + +// VCVTSS2SD performs "Convert Scalar Single-Precision FP Value to Scalar Double-Precision FP Value". +// +// Mnemonic : VCVTSS2SD +// Supported forms : (5 forms) +// +// * VCVTSS2SD xmm, xmm, xmm [AVX] +// * VCVTSS2SD m32, xmm, xmm [AVX] +// * VCVTSS2SD m32, xmm, xmm{k}{z} [AVX512F] +// * VCVTSS2SD {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VCVTSS2SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VCVTSS2SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSS2SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VCVTSS2SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VCVTSS2SD takes 3 or 4 operands") + } + // VCVTSS2SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSS2SD m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VCVTSS2SD m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VCVTSS2SD {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTSS2SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSS2SD") + } + return p +} + +// VCVTSS2SI performs "Convert Scalar Single-Precision FP Value to Dword Integer". +// +// Mnemonic : VCVTSS2SI +// Supported forms : (10 forms) +// +// * VCVTSS2SI xmm, r32 [AVX] +// * VCVTSS2SI m32, r32 [AVX] +// * VCVTSS2SI xmm, r64 [AVX] +// * VCVTSS2SI m32, r64 [AVX] +// * VCVTSS2SI m32, r32 [AVX512F] +// * VCVTSS2SI m32, r64 [AVX512F] +// * VCVTSS2SI {er}, xmm, r32 [AVX512F] +// * VCVTSS2SI {er}, xmm, r64 [AVX512F] +// * VCVTSS2SI xmm, r32 [AVX512F] +// * VCVTSS2SI xmm, r64 [AVX512F] +// +func (self *Program) VCVTSS2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSS2SI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTSS2SI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTSS2SI takes 2 or 3 operands") + } + // VCVTSS2SI xmm, r32 + if len(vv) == 0 && isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSS2SI m32, r32 + if len(vv) == 0 && isM32(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTSS2SI xmm, r64 + if len(vv) == 0 && isXMM(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfa) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSS2SI m32, r64 + if len(vv) == 0 && isM32(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x82, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTSS2SI m32, r32 + if len(vv) == 0 && isM32(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTSS2SI m32, r64 + if len(vv) == 0 && isM32(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTSS2SI {er}, xmm, r32 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSS2SI {er}, xmm, r64 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSS2SI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSS2SI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSS2SI") + } + return p +} + +// VCVTSS2USI performs "Convert Scalar Single-Precision Floating-Point Value to Unsigned Doubleword Integer". +// +// Mnemonic : VCVTSS2USI +// Supported forms : (6 forms) +// +// * VCVTSS2USI m32, r32 [AVX512F] +// * VCVTSS2USI m32, r64 [AVX512F] +// * VCVTSS2USI {er}, xmm, r32 [AVX512F] +// * VCVTSS2USI {er}, xmm, r64 [AVX512F] +// * VCVTSS2USI xmm, r32 [AVX512F] +// * VCVTSS2USI xmm, r64 [AVX512F] +// +func (self *Program) VCVTSS2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSS2USI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTSS2USI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTSS2USI takes 2 or 3 operands") + } + // VCVTSS2USI m32, r32 + if len(vv) == 0 && isM32(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTSS2USI m32, r64 + if len(vv) == 0 && isM32(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTSS2USI {er}, xmm, r32 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSS2USI {er}, xmm, r64 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSS2USI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSS2USI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSS2USI") + } + return p +} + +// VCVTTPD2DQ performs "Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : VCVTTPD2DQ +// Supported forms : (11 forms) +// +// * VCVTTPD2DQ xmm, xmm [AVX] +// * VCVTTPD2DQ ymm, xmm [AVX] +// * VCVTTPD2DQ m128, xmm [AVX] +// * VCVTTPD2DQ m256, xmm [AVX] +// * VCVTTPD2DQ m512/m64bcst, ymm{k}{z} [AVX512F] +// * VCVTTPD2DQ {sae}, zmm, ymm{k}{z} [AVX512F] +// * VCVTTPD2DQ zmm, ymm{k}{z} [AVX512F] +// * VCVTTPD2DQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPD2DQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPD2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPD2DQ ymm, xmm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTTPD2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPD2DQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPD2DQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPD2DQ takes 2 or 3 operands") + } + // VCVTTPD2DQ xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2DQ ymm, xmm + if len(vv) == 0 && isYMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2DQ m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTPD2DQ m256, xmm + if len(vv) == 0 && isM256(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTPD2DQ m512/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTTPD2DQ {sae}, zmm, ymm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isYMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPD2DQ zmm, ymm{k}{z} + if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2DQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPD2DQ m256/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPD2DQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2DQ ymm, xmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPD2DQ") + } + return p +} + +// VCVTTPD2QQ performs "Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Quadword Integers". +// +// Mnemonic : VCVTTPD2QQ +// Supported forms : (7 forms) +// +// * VCVTTPD2QQ m512/m64bcst, zmm{k}{z} [AVX512DQ] +// * VCVTTPD2QQ {sae}, zmm, zmm{k}{z} [AVX512DQ] +// * VCVTTPD2QQ zmm, zmm{k}{z} [AVX512DQ] +// * VCVTTPD2QQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPD2QQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPD2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPD2QQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTTPD2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPD2QQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPD2QQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPD2QQ takes 2 or 3 operands") + } + // VCVTTPD2QQ m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTTPD2QQ {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPD2QQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2QQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPD2QQ m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPD2QQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2QQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPD2QQ") + } + return p +} + +// VCVTTPD2UDQ performs "Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Unsigned Doubleword Integers". +// +// Mnemonic : VCVTTPD2UDQ +// Supported forms : (7 forms) +// +// * VCVTTPD2UDQ m512/m64bcst, ymm{k}{z} [AVX512F] +// * VCVTTPD2UDQ {sae}, zmm, ymm{k}{z} [AVX512F] +// * VCVTTPD2UDQ zmm, ymm{k}{z} [AVX512F] +// * VCVTTPD2UDQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPD2UDQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPD2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPD2UDQ ymm, xmm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTTPD2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPD2UDQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPD2UDQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPD2UDQ takes 2 or 3 operands") + } + // VCVTTPD2UDQ m512/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTTPD2UDQ {sae}, zmm, ymm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isYMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPD2UDQ zmm, ymm{k}{z} + if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2UDQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPD2UDQ m256/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPD2UDQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2UDQ ymm, xmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPD2UDQ") + } + return p +} + +// VCVTTPD2UQQ performs "Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Unsigned Quadword Integers". +// +// Mnemonic : VCVTTPD2UQQ +// Supported forms : (7 forms) +// +// * VCVTTPD2UQQ m512/m64bcst, zmm{k}{z} [AVX512DQ] +// * VCVTTPD2UQQ {sae}, zmm, zmm{k}{z} [AVX512DQ] +// * VCVTTPD2UQQ zmm, zmm{k}{z} [AVX512DQ] +// * VCVTTPD2UQQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPD2UQQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPD2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPD2UQQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTTPD2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPD2UQQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPD2UQQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPD2UQQ takes 2 or 3 operands") + } + // VCVTTPD2UQQ m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTTPD2UQQ {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPD2UQQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2UQQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPD2UQQ m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPD2UQQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2UQQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPD2UQQ") + } + return p +} + +// VCVTTPS2DQ performs "Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : VCVTTPS2DQ +// Supported forms : (11 forms) +// +// * VCVTTPS2DQ xmm, xmm [AVX] +// * VCVTTPS2DQ m128, xmm [AVX] +// * VCVTTPS2DQ ymm, ymm [AVX] +// * VCVTTPS2DQ m256, ymm [AVX] +// * VCVTTPS2DQ m512/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTTPS2DQ {sae}, zmm, zmm{k}{z} [AVX512F] +// * VCVTTPS2DQ zmm, zmm{k}{z} [AVX512F] +// * VCVTTPS2DQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPS2DQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPS2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPS2DQ ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTTPS2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPS2DQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPS2DQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPS2DQ takes 2 or 3 operands") + } + // VCVTTPS2DQ xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2DQ m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTPS2DQ ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), v[0], 0) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2DQ m256, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTPS2DQ m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTTPS2DQ {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPS2DQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2DQ m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPS2DQ m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPS2DQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2DQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPS2DQ") + } + return p +} + +// VCVTTPS2QQ performs "Convert with Truncation Packed Single Precision Floating-Point Values to Packed Singed Quadword Integer Values". +// +// Mnemonic : VCVTTPS2QQ +// Supported forms : (7 forms) +// +// * VCVTTPS2QQ m256/m32bcst, zmm{k}{z} [AVX512DQ] +// * VCVTTPS2QQ {sae}, ymm, zmm{k}{z} [AVX512DQ] +// * VCVTTPS2QQ ymm, zmm{k}{z} [AVX512DQ] +// * VCVTTPS2QQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPS2QQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPS2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPS2QQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTTPS2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPS2QQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPS2QQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPS2QQ takes 2 or 3 operands") + } + // VCVTTPS2QQ m256/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPS2QQ {sae}, ymm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPS2QQ ymm, zmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2QQ m64/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTTPS2QQ m128/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPS2QQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2QQ xmm, ymm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPS2QQ") + } + return p +} + +// VCVTTPS2UDQ performs "Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Unsigned Doubleword Integer Values". +// +// Mnemonic : VCVTTPS2UDQ +// Supported forms : (7 forms) +// +// * VCVTTPS2UDQ m512/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTTPS2UDQ {sae}, zmm, zmm{k}{z} [AVX512F] +// * VCVTTPS2UDQ zmm, zmm{k}{z} [AVX512F] +// * VCVTTPS2UDQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPS2UDQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPS2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPS2UDQ ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTTPS2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPS2UDQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPS2UDQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPS2UDQ takes 2 or 3 operands") + } + // VCVTTPS2UDQ m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTTPS2UDQ {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPS2UDQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2UDQ m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPS2UDQ m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPS2UDQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2UDQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPS2UDQ") + } + return p +} + +// VCVTTPS2UQQ performs "Convert with Truncation Packed Single Precision Floating-Point Values to Packed Unsigned Quadword Integer Values". +// +// Mnemonic : VCVTTPS2UQQ +// Supported forms : (7 forms) +// +// * VCVTTPS2UQQ m256/m32bcst, zmm{k}{z} [AVX512DQ] +// * VCVTTPS2UQQ {sae}, ymm, zmm{k}{z} [AVX512DQ] +// * VCVTTPS2UQQ ymm, zmm{k}{z} [AVX512DQ] +// * VCVTTPS2UQQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPS2UQQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPS2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPS2UQQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTTPS2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPS2UQQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPS2UQQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPS2UQQ takes 2 or 3 operands") + } + // VCVTTPS2UQQ m256/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPS2UQQ {sae}, ymm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPS2UQQ ymm, zmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2UQQ m64/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTTPS2UQQ m128/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPS2UQQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2UQQ xmm, ymm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPS2UQQ") + } + return p +} + +// VCVTTSD2SI performs "Convert with Truncation Scalar Double-Precision FP Value to Signed Integer". +// +// Mnemonic : VCVTTSD2SI +// Supported forms : (10 forms) +// +// * VCVTTSD2SI xmm, r32 [AVX] +// * VCVTTSD2SI m64, r32 [AVX] +// * VCVTTSD2SI xmm, r64 [AVX] +// * VCVTTSD2SI m64, r64 [AVX] +// * VCVTTSD2SI m64, r32 [AVX512F] +// * VCVTTSD2SI m64, r64 [AVX512F] +// * VCVTTSD2SI {sae}, xmm, r32 [AVX512F] +// * VCVTTSD2SI {sae}, xmm, r64 [AVX512F] +// * VCVTTSD2SI xmm, r32 [AVX512F] +// * VCVTTSD2SI xmm, r64 [AVX512F] +// +func (self *Program) VCVTTSD2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTSD2SI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTSD2SI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTSD2SI takes 2 or 3 operands") + } + // VCVTTSD2SI xmm, r32 + if len(vv) == 0 && isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), v[0], 0) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSD2SI m64, r32 + if len(vv) == 0 && isM64(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTSD2SI xmm, r64 + if len(vv) == 0 && isXMM(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfb) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSD2SI m64, r64 + if len(vv) == 0 && isM64(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x83, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTSD2SI m64, r32 + if len(vv) == 0 && isM64(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTTSD2SI m64, r64 + if len(vv) == 0 && isM64(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTTSD2SI {sae}, xmm, r32 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit(0x18) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSD2SI {sae}, xmm, r64 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff) + m.emit(0x18) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSD2SI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit(0x48) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSD2SI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit(0x48) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTSD2SI") + } + return p +} + +// VCVTTSD2USI performs "Convert with Truncation Scalar Double-Precision Floating-Point Value to Unsigned Integer". +// +// Mnemonic : VCVTTSD2USI +// Supported forms : (6 forms) +// +// * VCVTTSD2USI m64, r32 [AVX512F] +// * VCVTTSD2USI m64, r64 [AVX512F] +// * VCVTTSD2USI {sae}, xmm, r32 [AVX512F] +// * VCVTTSD2USI {sae}, xmm, r64 [AVX512F] +// * VCVTTSD2USI xmm, r32 [AVX512F] +// * VCVTTSD2USI xmm, r64 [AVX512F] +// +func (self *Program) VCVTTSD2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTSD2USI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTSD2USI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTSD2USI takes 2 or 3 operands") + } + // VCVTTSD2USI m64, r32 + if len(vv) == 0 && isM64(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTTSD2USI m64, r64 + if len(vv) == 0 && isM64(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTTSD2USI {sae}, xmm, r32 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit(0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSD2USI {sae}, xmm, r64 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff) + m.emit(0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSD2USI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit(0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSD2USI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit(0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTSD2USI") + } + return p +} + +// VCVTTSS2SI performs "Convert with Truncation Scalar Single-Precision FP Value to Dword Integer". +// +// Mnemonic : VCVTTSS2SI +// Supported forms : (10 forms) +// +// * VCVTTSS2SI xmm, r32 [AVX] +// * VCVTTSS2SI m32, r32 [AVX] +// * VCVTTSS2SI xmm, r64 [AVX] +// * VCVTTSS2SI m32, r64 [AVX] +// * VCVTTSS2SI m32, r32 [AVX512F] +// * VCVTTSS2SI m32, r64 [AVX512F] +// * VCVTTSS2SI {sae}, xmm, r32 [AVX512F] +// * VCVTTSS2SI {sae}, xmm, r64 [AVX512F] +// * VCVTTSS2SI xmm, r32 [AVX512F] +// * VCVTTSS2SI xmm, r64 [AVX512F] +// +func (self *Program) VCVTTSS2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTSS2SI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTSS2SI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTSS2SI takes 2 or 3 operands") + } + // VCVTTSS2SI xmm, r32 + if len(vv) == 0 && isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSS2SI m32, r32 + if len(vv) == 0 && isM32(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTSS2SI xmm, r64 + if len(vv) == 0 && isXMM(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfa) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSS2SI m32, r64 + if len(vv) == 0 && isM32(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x82, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTSS2SI m32, r32 + if len(vv) == 0 && isM32(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTTSS2SI m32, r64 + if len(vv) == 0 && isM32(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTTSS2SI {sae}, xmm, r32 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit(0x18) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSS2SI {sae}, xmm, r64 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe) + m.emit(0x18) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSS2SI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSS2SI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTSS2SI") + } + return p +} + +// VCVTTSS2USI performs "Convert with Truncation Scalar Single-Precision Floating-Point Value to Unsigned Integer". +// +// Mnemonic : VCVTTSS2USI +// Supported forms : (6 forms) +// +// * VCVTTSS2USI m32, r32 [AVX512F] +// * VCVTTSS2USI m32, r64 [AVX512F] +// * VCVTTSS2USI {sae}, xmm, r32 [AVX512F] +// * VCVTTSS2USI {sae}, xmm, r64 [AVX512F] +// * VCVTTSS2USI xmm, r32 [AVX512F] +// * VCVTTSS2USI xmm, r64 [AVX512F] +// +func (self *Program) VCVTTSS2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTSS2USI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTSS2USI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTSS2USI takes 2 or 3 operands") + } + // VCVTTSS2USI m32, r32 + if len(vv) == 0 && isM32(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTTSS2USI m32, r64 + if len(vv) == 0 && isM32(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTTSS2USI {sae}, xmm, r32 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit(0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSS2USI {sae}, xmm, r64 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe) + m.emit(0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSS2USI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSS2USI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTSS2USI") + } + return p +} + +// VCVTUDQ2PD performs "Convert Packed Unsigned Doubleword Integers to Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VCVTUDQ2PD +// Supported forms : (6 forms) +// +// * VCVTUDQ2PD m256/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTUDQ2PD ymm, zmm{k}{z} [AVX512F] +// * VCVTUDQ2PD m64/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTUDQ2PD m128/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTUDQ2PD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTUDQ2PD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTUDQ2PD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VCVTUDQ2PD", 2, Operands { v0, v1 }) + // VCVTUDQ2PD m256/m32bcst, zmm{k}{z} + if isM256M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTUDQ2PD ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUDQ2PD m64/m32bcst, xmm{k}{z} + if isM64M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTUDQ2PD m128/m32bcst, ymm{k}{z} + if isM128M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTUDQ2PD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUDQ2PD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTUDQ2PD") + } + return p +} + +// VCVTUDQ2PS performs "Convert Packed Unsigned Doubleword Integers to Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VCVTUDQ2PS +// Supported forms : (7 forms) +// +// * VCVTUDQ2PS m512/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTUDQ2PS {er}, zmm, zmm{k}{z} [AVX512F] +// * VCVTUDQ2PS zmm, zmm{k}{z} [AVX512F] +// * VCVTUDQ2PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTUDQ2PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTUDQ2PS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTUDQ2PS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTUDQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTUDQ2PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTUDQ2PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTUDQ2PS takes 2 or 3 operands") + } + // VCVTUDQ2PS m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTUDQ2PS {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTUDQ2PS zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUDQ2PS m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTUDQ2PS m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTUDQ2PS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUDQ2PS ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTUDQ2PS") + } + return p +} + +// VCVTUQQ2PD performs "Convert Packed Unsigned Quadword Integers to Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VCVTUQQ2PD +// Supported forms : (7 forms) +// +// * VCVTUQQ2PD m512/m64bcst, zmm{k}{z} [AVX512DQ] +// * VCVTUQQ2PD {er}, zmm, zmm{k}{z} [AVX512DQ] +// * VCVTUQQ2PD zmm, zmm{k}{z} [AVX512DQ] +// * VCVTUQQ2PD m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTUQQ2PD m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTUQQ2PD xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTUQQ2PD ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTUQQ2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTUQQ2PD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTUQQ2PD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTUQQ2PD takes 2 or 3 operands") + } + // VCVTUQQ2PD m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTUQQ2PD {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTUQQ2PD zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUQQ2PD m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTUQQ2PD m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTUQQ2PD xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUQQ2PD ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTUQQ2PD") + } + return p +} + +// VCVTUQQ2PS performs "Convert Packed Unsigned Quadword Integers to Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VCVTUQQ2PS +// Supported forms : (7 forms) +// +// * VCVTUQQ2PS m512/m64bcst, ymm{k}{z} [AVX512DQ] +// * VCVTUQQ2PS {er}, zmm, ymm{k}{z} [AVX512DQ] +// * VCVTUQQ2PS zmm, ymm{k}{z} [AVX512DQ] +// * VCVTUQQ2PS m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTUQQ2PS m256/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTUQQ2PS xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTUQQ2PS ymm, xmm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTUQQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTUQQ2PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTUQQ2PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTUQQ2PS takes 2 or 3 operands") + } + // VCVTUQQ2PS m512/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTUQQ2PS {er}, zmm, ymm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTUQQ2PS zmm, ymm{k}{z} + if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUQQ2PS m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTUQQ2PS m256/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTUQQ2PS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUQQ2PS ymm, xmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTUQQ2PS") + } + return p +} + +// VCVTUSI2SD performs "Convert Unsigned Integer to Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VCVTUSI2SD +// Supported forms : (5 forms) +// +// * VCVTUSI2SD r32, xmm, xmm [AVX512F] +// * VCVTUSI2SD m32, xmm, xmm [AVX512F] +// * VCVTUSI2SD m64, xmm, xmm [AVX512F] +// * VCVTUSI2SD {er}, r64, xmm, xmm [AVX512F] +// * VCVTUSI2SD r64, xmm, xmm [AVX512F] +// +func (self *Program) VCVTUSI2SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTUSI2SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VCVTUSI2SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VCVTUSI2SD takes 3 or 4 operands") + } + // VCVTUSI2SD r32, xmm, xmm + if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTUSI2SD m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x7b) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VCVTUSI2SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x7b) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VCVTUSI2SD {er}, r64, xmm, xmm + if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTUSI2SD r64, xmm, xmm + if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTUSI2SD") + } + return p +} + +// VCVTUSI2SS performs "Convert Unsigned Integer to Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VCVTUSI2SS +// Supported forms : (6 forms) +// +// * VCVTUSI2SS m32, xmm, xmm [AVX512F] +// * VCVTUSI2SS m64, xmm, xmm [AVX512F] +// * VCVTUSI2SS {er}, r32, xmm, xmm [AVX512F] +// * VCVTUSI2SS {er}, r64, xmm, xmm [AVX512F] +// * VCVTUSI2SS r32, xmm, xmm [AVX512F] +// * VCVTUSI2SS r64, xmm, xmm [AVX512F] +// +func (self *Program) VCVTUSI2SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTUSI2SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VCVTUSI2SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VCVTUSI2SS takes 3 or 4 operands") + } + // VCVTUSI2SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x7b) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VCVTUSI2SS m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x7b) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VCVTUSI2SS {er}, r32, xmm, xmm + if len(vv) == 1 && isER(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTUSI2SS {er}, r64, xmm, xmm + if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfe ^ (hlcode(v[2]) << 3)) + m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTUSI2SS r32, xmm, xmm + if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTUSI2SS r64, xmm, xmm + if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTUSI2SS") + } + return p +} + +// VDBPSADBW performs "Double Block Packed Sum-Absolute-Differences on Unsigned Bytes". +// +// Mnemonic : VDBPSADBW +// Supported forms : (6 forms) +// +// * VDBPSADBW imm8, zmm, zmm, zmm{k}{z} [AVX512BW] +// * VDBPSADBW imm8, m512, zmm, zmm{k}{z} [AVX512BW] +// * VDBPSADBW imm8, xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VDBPSADBW imm8, m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VDBPSADBW imm8, ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VDBPSADBW imm8, m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VDBPSADBW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VDBPSADBW", 4, Operands { v0, v1, v2, v3 }) + // VDBPSADBW imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x42) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VDBPSADBW imm8, m512, zmm, zmm{k}{z} + if isImm8(v0) && isM512(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x42) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VDBPSADBW imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x42) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VDBPSADBW imm8, m128, xmm, xmm{k}{z} + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x42) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VDBPSADBW imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x42) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VDBPSADBW imm8, m256, ymm, ymm{k}{z} + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x42) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VDBPSADBW") + } + return p +} + +// VDIVPD performs "Divide Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VDIVPD +// Supported forms : (11 forms) +// +// * VDIVPD xmm, xmm, xmm [AVX] +// * VDIVPD m128, xmm, xmm [AVX] +// * VDIVPD ymm, ymm, ymm [AVX] +// * VDIVPD m256, ymm, ymm [AVX] +// * VDIVPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VDIVPD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VDIVPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VDIVPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VDIVPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VDIVPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VDIVPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VDIVPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VDIVPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VDIVPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VDIVPD takes 3 or 4 operands") + } + // VDIVPD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VDIVPD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VDIVPD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VDIVPD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VDIVPD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VDIVPD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VDIVPD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VDIVPD") + } + return p +} + +// VDIVPS performs "Divide Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VDIVPS +// Supported forms : (11 forms) +// +// * VDIVPS xmm, xmm, xmm [AVX] +// * VDIVPS m128, xmm, xmm [AVX] +// * VDIVPS ymm, ymm, ymm [AVX] +// * VDIVPS m256, ymm, ymm [AVX] +// * VDIVPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VDIVPS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VDIVPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VDIVPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VDIVPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VDIVPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VDIVPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VDIVPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VDIVPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VDIVPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VDIVPS takes 3 or 4 operands") + } + // VDIVPS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VDIVPS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VDIVPS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VDIVPS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VDIVPS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VDIVPS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VDIVPS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VDIVPS") + } + return p +} + +// VDIVSD performs "Divide Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VDIVSD +// Supported forms : (5 forms) +// +// * VDIVSD xmm, xmm, xmm [AVX] +// * VDIVSD m64, xmm, xmm [AVX] +// * VDIVSD m64, xmm, xmm{k}{z} [AVX512F] +// * VDIVSD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VDIVSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VDIVSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VDIVSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VDIVSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VDIVSD takes 3 or 4 operands") + } + // VDIVSD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVSD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VDIVSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VDIVSD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VDIVSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VDIVSD") + } + return p +} + +// VDIVSS performs "Divide Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VDIVSS +// Supported forms : (5 forms) +// +// * VDIVSS xmm, xmm, xmm [AVX] +// * VDIVSS m32, xmm, xmm [AVX] +// * VDIVSS m32, xmm, xmm{k}{z} [AVX512F] +// * VDIVSS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VDIVSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VDIVSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VDIVSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VDIVSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VDIVSS takes 3 or 4 operands") + } + // VDIVSS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVSS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VDIVSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VDIVSS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VDIVSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VDIVSS") + } + return p +} + +// VDPPD performs "Dot Product of Packed Double Precision Floating-Point Values". +// +// Mnemonic : VDPPD +// Supported forms : (2 forms) +// +// * VDPPD imm8, xmm, xmm, xmm [AVX] +// * VDPPD imm8, m128, xmm, xmm [AVX] +// +func (self *Program) VDPPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VDPPD", 4, Operands { v0, v1, v2, v3 }) + // VDPPD imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x41) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VDPPD imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x41) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VDPPD") + } + return p +} + +// VDPPS performs "Dot Product of Packed Single Precision Floating-Point Values". +// +// Mnemonic : VDPPS +// Supported forms : (4 forms) +// +// * VDPPS imm8, xmm, xmm, xmm [AVX] +// * VDPPS imm8, m128, xmm, xmm [AVX] +// * VDPPS imm8, ymm, ymm, ymm [AVX] +// * VDPPS imm8, m256, ymm, ymm [AVX] +// +func (self *Program) VDPPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VDPPS", 4, Operands { v0, v1, v2, v3 }) + // VDPPS imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x40) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VDPPS imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x40) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VDPPS imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x40) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VDPPS imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x40) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VDPPS") + } + return p +} + +// VEXP2PD performs "Approximation to the Exponential 2^x of Packed Double-Precision Floating-Point Values with Less Than 2^-23 Relative Error". +// +// Mnemonic : VEXP2PD +// Supported forms : (3 forms) +// +// * VEXP2PD m512/m64bcst, zmm{k}{z} [AVX512ER] +// * VEXP2PD {sae}, zmm, zmm{k}{z} [AVX512ER] +// * VEXP2PD zmm, zmm{k}{z} [AVX512ER] +// +func (self *Program) VEXP2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VEXP2PD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VEXP2PD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VEXP2PD takes 2 or 3 operands") + } + // VEXP2PD m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc8) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VEXP2PD {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0xc8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VEXP2PD zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xc8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXP2PD") + } + return p +} + +// VEXP2PS performs "Approximation to the Exponential 2^x of Packed Single-Precision Floating-Point Values with Less Than 2^-23 Relative Error". +// +// Mnemonic : VEXP2PS +// Supported forms : (3 forms) +// +// * VEXP2PS m512/m32bcst, zmm{k}{z} [AVX512ER] +// * VEXP2PS {sae}, zmm, zmm{k}{z} [AVX512ER] +// * VEXP2PS zmm, zmm{k}{z} [AVX512ER] +// +func (self *Program) VEXP2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VEXP2PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VEXP2PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VEXP2PS takes 2 or 3 operands") + } + // VEXP2PS m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc8) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VEXP2PS {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0xc8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VEXP2PS zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xc8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXP2PS") + } + return p +} + +// VEXPANDPD performs "Load Sparse Packed Double-Precision Floating-Point Values from Dense Memory". +// +// Mnemonic : VEXPANDPD +// Supported forms : (6 forms) +// +// * VEXPANDPD zmm, zmm{k}{z} [AVX512F] +// * VEXPANDPD m512, zmm{k}{z} [AVX512F] +// * VEXPANDPD xmm, xmm{k}{z} [AVX512VL] +// * VEXPANDPD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VEXPANDPD m128, xmm{k}{z} [AVX512VL] +// * VEXPANDPD m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VEXPANDPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VEXPANDPD", 2, Operands { v0, v1 }) + // VEXPANDPD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x88) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VEXPANDPD m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x88) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VEXPANDPD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x88) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VEXPANDPD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x88) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VEXPANDPD m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x88) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VEXPANDPD m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x88) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VEXPANDPD") + } + return p +} + +// VEXPANDPS performs "Load Sparse Packed Single-Precision Floating-Point Values from Dense Memory". +// +// Mnemonic : VEXPANDPS +// Supported forms : (6 forms) +// +// * VEXPANDPS zmm, zmm{k}{z} [AVX512F] +// * VEXPANDPS m512, zmm{k}{z} [AVX512F] +// * VEXPANDPS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VEXPANDPS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VEXPANDPS m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VEXPANDPS m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VEXPANDPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VEXPANDPS", 2, Operands { v0, v1 }) + // VEXPANDPS zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x88) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VEXPANDPS m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x88) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VEXPANDPS xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x88) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VEXPANDPS ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x88) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VEXPANDPS m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x88) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VEXPANDPS m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x88) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VEXPANDPS") + } + return p +} + +// VEXTRACTF128 performs "Extract Packed Floating-Point Values". +// +// Mnemonic : VEXTRACTF128 +// Supported forms : (2 forms) +// +// * VEXTRACTF128 imm8, ymm, xmm [AVX] +// * VEXTRACTF128 imm8, ymm, m128 [AVX] +// +func (self *Program) VEXTRACTF128(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTF128", 3, Operands { v0, v1, v2 }) + // VEXTRACTF128 imm8, ymm, xmm + if isImm8(v0) && isYMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x7d) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF128 imm8, ymm, m128 + if isImm8(v0) && isYMM(v1) && isM128(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[1]), addr(v[2]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTF128") + } + return p +} + +// VEXTRACTF32X4 performs "Extract 128 Bits of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VEXTRACTF32X4 +// Supported forms : (4 forms) +// +// * VEXTRACTF32X4 imm8, zmm, xmm{k}{z} [AVX512F] +// * VEXTRACTF32X4 imm8, zmm, m128{k}{z} [AVX512F] +// * VEXTRACTF32X4 imm8, ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VEXTRACTF32X4 imm8, ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VEXTRACTF32X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTF32X4", 3, Operands { v0, v1, v2 }) + // VEXTRACTF32X4 imm8, zmm, xmm{k}{z} + if isImm8(v0) && isZMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF32X4 imm8, zmm, m128{k}{z} + if isImm8(v0) && isZMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF32X4 imm8, ymm, xmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF32X4 imm8, ymm, m128{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTF32X4") + } + return p +} + +// VEXTRACTF32X8 performs "Extract 256 Bits of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VEXTRACTF32X8 +// Supported forms : (2 forms) +// +// * VEXTRACTF32X8 imm8, zmm, ymm{k}{z} [AVX512DQ] +// * VEXTRACTF32X8 imm8, zmm, m256{k}{z} [AVX512DQ] +// +func (self *Program) VEXTRACTF32X8(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTF32X8", 3, Operands { v0, v1, v2 }) + // VEXTRACTF32X8 imm8, zmm, ymm{k}{z} + if isImm8(v0) && isZMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x1b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF32X8 imm8, zmm, m256{k}{z} + if isImm8(v0) && isZMM(v1) && isM256kz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x1b) + m.mrsd(lcode(v[1]), addr(v[2]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTF32X8") + } + return p +} + +// VEXTRACTF64X2 performs "Extract 128 Bits of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VEXTRACTF64X2 +// Supported forms : (4 forms) +// +// * VEXTRACTF64X2 imm8, zmm, xmm{k}{z} [AVX512DQ] +// * VEXTRACTF64X2 imm8, zmm, m128{k}{z} [AVX512DQ] +// * VEXTRACTF64X2 imm8, ymm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VEXTRACTF64X2 imm8, ymm, m128{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VEXTRACTF64X2(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTF64X2", 3, Operands { v0, v1, v2 }) + // VEXTRACTF64X2 imm8, zmm, xmm{k}{z} + if isImm8(v0) && isZMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF64X2 imm8, zmm, m128{k}{z} + if isImm8(v0) && isZMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF64X2 imm8, ymm, xmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF64X2 imm8, ymm, m128{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTF64X2") + } + return p +} + +// VEXTRACTF64X4 performs "Extract 256 Bits of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VEXTRACTF64X4 +// Supported forms : (2 forms) +// +// * VEXTRACTF64X4 imm8, zmm, ymm{k}{z} [AVX512F] +// * VEXTRACTF64X4 imm8, zmm, m256{k}{z} [AVX512F] +// +func (self *Program) VEXTRACTF64X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTF64X4", 3, Operands { v0, v1, v2 }) + // VEXTRACTF64X4 imm8, zmm, ymm{k}{z} + if isImm8(v0) && isZMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x1b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF64X4 imm8, zmm, m256{k}{z} + if isImm8(v0) && isZMM(v1) && isM256kz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x1b) + m.mrsd(lcode(v[1]), addr(v[2]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTF64X4") + } + return p +} + +// VEXTRACTI128 performs "Extract Packed Integer Values". +// +// Mnemonic : VEXTRACTI128 +// Supported forms : (2 forms) +// +// * VEXTRACTI128 imm8, ymm, xmm [AVX2] +// * VEXTRACTI128 imm8, ymm, m128 [AVX2] +// +func (self *Program) VEXTRACTI128(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTI128", 3, Operands { v0, v1, v2 }) + // VEXTRACTI128 imm8, ymm, xmm + if isImm8(v0) && isYMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x7d) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI128 imm8, ymm, m128 + if isImm8(v0) && isYMM(v1) && isM128(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[1]), addr(v[2]), 0) + m.emit(0x39) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTI128") + } + return p +} + +// VEXTRACTI32X4 performs "Extract 128 Bits of Packed Doubleword Integer Values". +// +// Mnemonic : VEXTRACTI32X4 +// Supported forms : (4 forms) +// +// * VEXTRACTI32X4 imm8, zmm, xmm{k}{z} [AVX512F] +// * VEXTRACTI32X4 imm8, zmm, m128{k}{z} [AVX512F] +// * VEXTRACTI32X4 imm8, ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VEXTRACTI32X4 imm8, ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VEXTRACTI32X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTI32X4", 3, Operands { v0, v1, v2 }) + // VEXTRACTI32X4 imm8, zmm, xmm{k}{z} + if isImm8(v0) && isZMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI32X4 imm8, zmm, m128{k}{z} + if isImm8(v0) && isZMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x39) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI32X4 imm8, ymm, xmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI32X4 imm8, ymm, m128{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x39) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTI32X4") + } + return p +} + +// VEXTRACTI32X8 performs "Extract 256 Bits of Packed Doubleword Integer Values". +// +// Mnemonic : VEXTRACTI32X8 +// Supported forms : (2 forms) +// +// * VEXTRACTI32X8 imm8, zmm, ymm{k}{z} [AVX512DQ] +// * VEXTRACTI32X8 imm8, zmm, m256{k}{z} [AVX512DQ] +// +func (self *Program) VEXTRACTI32X8(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTI32X8", 3, Operands { v0, v1, v2 }) + // VEXTRACTI32X8 imm8, zmm, ymm{k}{z} + if isImm8(v0) && isZMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI32X8 imm8, zmm, m256{k}{z} + if isImm8(v0) && isZMM(v1) && isM256kz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3b) + m.mrsd(lcode(v[1]), addr(v[2]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTI32X8") + } + return p +} + +// VEXTRACTI64X2 performs "Extract 128 Bits of Packed Quadword Integer Values". +// +// Mnemonic : VEXTRACTI64X2 +// Supported forms : (4 forms) +// +// * VEXTRACTI64X2 imm8, zmm, xmm{k}{z} [AVX512DQ] +// * VEXTRACTI64X2 imm8, zmm, m128{k}{z} [AVX512DQ] +// * VEXTRACTI64X2 imm8, ymm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VEXTRACTI64X2 imm8, ymm, m128{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VEXTRACTI64X2(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTI64X2", 3, Operands { v0, v1, v2 }) + // VEXTRACTI64X2 imm8, zmm, xmm{k}{z} + if isImm8(v0) && isZMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI64X2 imm8, zmm, m128{k}{z} + if isImm8(v0) && isZMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x39) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI64X2 imm8, ymm, xmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI64X2 imm8, ymm, m128{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x39) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTI64X2") + } + return p +} + +// VEXTRACTI64X4 performs "Extract 256 Bits of Packed Quadword Integer Values". +// +// Mnemonic : VEXTRACTI64X4 +// Supported forms : (2 forms) +// +// * VEXTRACTI64X4 imm8, zmm, ymm{k}{z} [AVX512F] +// * VEXTRACTI64X4 imm8, zmm, m256{k}{z} [AVX512F] +// +func (self *Program) VEXTRACTI64X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTI64X4", 3, Operands { v0, v1, v2 }) + // VEXTRACTI64X4 imm8, zmm, ymm{k}{z} + if isImm8(v0) && isZMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI64X4 imm8, zmm, m256{k}{z} + if isImm8(v0) && isZMM(v1) && isM256kz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3b) + m.mrsd(lcode(v[1]), addr(v[2]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTI64X4") + } + return p +} + +// VEXTRACTPS performs "Extract Packed Single Precision Floating-Point Value". +// +// Mnemonic : VEXTRACTPS +// Supported forms : (4 forms) +// +// * VEXTRACTPS imm8, xmm, r32 [AVX] +// * VEXTRACTPS imm8, xmm, m32 [AVX] +// * VEXTRACTPS imm8, xmm, r32 [AVX512F] +// * VEXTRACTPS imm8, xmm, m32 [AVX512F] +// +func (self *Program) VEXTRACTPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTPS", 3, Operands { v0, v1, v2 }) + // VEXTRACTPS imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x79) + m.emit(0x17) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTPS imm8, xmm, m32 + if isImm8(v0) && isXMM(v1) && isM32(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0) + m.emit(0x17) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTPS imm8, xmm, r32 + if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit(0x08) + m.emit(0x17) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTPS imm8, xmm, m32 + if isImm8(v0) && isEVEXXMM(v1) && isM32(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0) + m.emit(0x17) + m.mrsd(lcode(v[1]), addr(v[2]), 4) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTPS") + } + return p +} + +// VFIXUPIMMPD performs "Fix Up Special Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFIXUPIMMPD +// Supported forms : (7 forms) +// +// * VFIXUPIMMPD imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFIXUPIMMPD imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFIXUPIMMPD imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFIXUPIMMPD imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFIXUPIMMPD imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFIXUPIMMPD imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFIXUPIMMPD imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFIXUPIMMPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFIXUPIMMPD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VFIXUPIMMPD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VFIXUPIMMPD takes 4 or 5 operands") + } + // VFIXUPIMMPD imm8, m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPD imm8, {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x54) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPD imm8, zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x54) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPD imm8, m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPD imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x54) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPD imm8, m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPD imm8, ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x54) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFIXUPIMMPD") + } + return p +} + +// VFIXUPIMMPS performs "Fix Up Special Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFIXUPIMMPS +// Supported forms : (7 forms) +// +// * VFIXUPIMMPS imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFIXUPIMMPS imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFIXUPIMMPS imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFIXUPIMMPS imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512VL] +// * VFIXUPIMMPS imm8, xmm, xmm, xmm{k}{z} [AVX512VL] +// * VFIXUPIMMPS imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFIXUPIMMPS imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFIXUPIMMPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFIXUPIMMPS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VFIXUPIMMPS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VFIXUPIMMPS takes 4 or 5 operands") + } + // VFIXUPIMMPS imm8, m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPS imm8, {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x54) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPS imm8, zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x54) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPS imm8, m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPS imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x54) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPS imm8, m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPS imm8, ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x54) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFIXUPIMMPS") + } + return p +} + +// VFIXUPIMMSD performs "Fix Up Special Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VFIXUPIMMSD +// Supported forms : (3 forms) +// +// * VFIXUPIMMSD imm8, m64, xmm, xmm{k}{z} [AVX512F] +// * VFIXUPIMMSD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFIXUPIMMSD imm8, xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFIXUPIMMSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFIXUPIMMSD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VFIXUPIMMSD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VFIXUPIMMSD takes 4 or 5 operands") + } + // VFIXUPIMMSD imm8, m64, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x55) + m.mrsd(lcode(v[3]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMSD imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x55) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMSD imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x55) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFIXUPIMMSD") + } + return p +} + +// VFIXUPIMMSS performs "Fix Up Special Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VFIXUPIMMSS +// Supported forms : (3 forms) +// +// * VFIXUPIMMSS imm8, m32, xmm, xmm{k}{z} [AVX512F] +// * VFIXUPIMMSS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFIXUPIMMSS imm8, xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFIXUPIMMSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFIXUPIMMSS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VFIXUPIMMSS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VFIXUPIMMSS takes 4 or 5 operands") + } + // VFIXUPIMMSS imm8, m32, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x55) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMSS imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x55) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMSS imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x55) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFIXUPIMMSS") + } + return p +} + +// VFMADD132PD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADD132PD +// Supported forms : (11 forms) +// +// * VFMADD132PD xmm, xmm, xmm [FMA3] +// * VFMADD132PD m128, xmm, xmm [FMA3] +// * VFMADD132PD ymm, ymm, ymm [FMA3] +// * VFMADD132PD m256, ymm, ymm [FMA3] +// * VFMADD132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADD132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD132PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADD132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADD132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD132PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD132PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD132PD takes 3 or 4 operands") + } + // VFMADD132PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD132PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD132PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADD132PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x98) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD132PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADD132PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADD132PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD132PD") + } + return p +} + +// VFMADD132PS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADD132PS +// Supported forms : (11 forms) +// +// * VFMADD132PS xmm, xmm, xmm [FMA3] +// * VFMADD132PS m128, xmm, xmm [FMA3] +// * VFMADD132PS ymm, ymm, ymm [FMA3] +// * VFMADD132PS m256, ymm, ymm [FMA3] +// * VFMADD132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADD132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD132PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADD132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADD132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD132PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD132PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD132PS takes 3 or 4 operands") + } + // VFMADD132PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD132PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD132PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADD132PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x98) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD132PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADD132PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADD132PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD132PS") + } + return p +} + +// VFMADD132SD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADD132SD +// Supported forms : (5 forms) +// +// * VFMADD132SD xmm, xmm, xmm [FMA3] +// * VFMADD132SD m64, xmm, xmm [FMA3] +// * VFMADD132SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFMADD132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMADD132SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMADD132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD132SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD132SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD132SD takes 3 or 4 operands") + } + // VFMADD132SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x99) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x99) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD132SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x99) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFMADD132SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x99) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD132SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x99) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD132SD") + } + return p +} + +// VFMADD132SS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADD132SS +// Supported forms : (5 forms) +// +// * VFMADD132SS xmm, xmm, xmm [FMA3] +// * VFMADD132SS m32, xmm, xmm [FMA3] +// * VFMADD132SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFMADD132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMADD132SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMADD132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD132SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD132SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD132SS takes 3 or 4 operands") + } + // VFMADD132SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x99) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x99) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD132SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x99) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFMADD132SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x99) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD132SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x99) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD132SS") + } + return p +} + +// VFMADD213PD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADD213PD +// Supported forms : (11 forms) +// +// * VFMADD213PD xmm, xmm, xmm [FMA3] +// * VFMADD213PD m128, xmm, xmm [FMA3] +// * VFMADD213PD ymm, ymm, ymm [FMA3] +// * VFMADD213PD m256, ymm, ymm [FMA3] +// * VFMADD213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADD213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD213PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADD213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADD213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD213PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD213PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD213PD takes 3 or 4 operands") + } + // VFMADD213PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD213PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD213PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADD213PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD213PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADD213PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADD213PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD213PD") + } + return p +} + +// VFMADD213PS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADD213PS +// Supported forms : (11 forms) +// +// * VFMADD213PS xmm, xmm, xmm [FMA3] +// * VFMADD213PS m128, xmm, xmm [FMA3] +// * VFMADD213PS ymm, ymm, ymm [FMA3] +// * VFMADD213PS m256, ymm, ymm [FMA3] +// * VFMADD213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADD213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD213PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADD213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADD213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD213PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD213PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD213PS takes 3 or 4 operands") + } + // VFMADD213PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD213PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD213PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADD213PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD213PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADD213PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADD213PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD213PS") + } + return p +} + +// VFMADD213SD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADD213SD +// Supported forms : (5 forms) +// +// * VFMADD213SD xmm, xmm, xmm [FMA3] +// * VFMADD213SD m64, xmm, xmm [FMA3] +// * VFMADD213SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFMADD213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMADD213SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMADD213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD213SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD213SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD213SD takes 3 or 4 operands") + } + // VFMADD213SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xa9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD213SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xa9) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFMADD213SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa9) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD213SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD213SD") + } + return p +} + +// VFMADD213SS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADD213SS +// Supported forms : (5 forms) +// +// * VFMADD213SS xmm, xmm, xmm [FMA3] +// * VFMADD213SS m32, xmm, xmm [FMA3] +// * VFMADD213SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFMADD213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMADD213SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMADD213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD213SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD213SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD213SS takes 3 or 4 operands") + } + // VFMADD213SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xa9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD213SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xa9) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFMADD213SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa9) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD213SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD213SS") + } + return p +} + +// VFMADD231PD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADD231PD +// Supported forms : (11 forms) +// +// * VFMADD231PD xmm, xmm, xmm [FMA3] +// * VFMADD231PD m128, xmm, xmm [FMA3] +// * VFMADD231PD ymm, ymm, ymm [FMA3] +// * VFMADD231PD m256, ymm, ymm [FMA3] +// * VFMADD231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADD231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD231PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADD231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADD231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD231PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD231PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD231PD takes 3 or 4 operands") + } + // VFMADD231PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD231PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD231PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADD231PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD231PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADD231PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADD231PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD231PD") + } + return p +} + +// VFMADD231PS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADD231PS +// Supported forms : (11 forms) +// +// * VFMADD231PS xmm, xmm, xmm [FMA3] +// * VFMADD231PS m128, xmm, xmm [FMA3] +// * VFMADD231PS ymm, ymm, ymm [FMA3] +// * VFMADD231PS m256, ymm, ymm [FMA3] +// * VFMADD231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADD231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD231PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADD231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADD231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD231PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD231PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD231PS takes 3 or 4 operands") + } + // VFMADD231PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD231PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD231PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADD231PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD231PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADD231PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADD231PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD231PS") + } + return p +} + +// VFMADD231SD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADD231SD +// Supported forms : (5 forms) +// +// * VFMADD231SD xmm, xmm, xmm [FMA3] +// * VFMADD231SD m64, xmm, xmm [FMA3] +// * VFMADD231SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFMADD231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMADD231SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMADD231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD231SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD231SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD231SD takes 3 or 4 operands") + } + // VFMADD231SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xb9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD231SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xb9) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFMADD231SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb9) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD231SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD231SD") + } + return p +} + +// VFMADD231SS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADD231SS +// Supported forms : (5 forms) +// +// * VFMADD231SS xmm, xmm, xmm [FMA3] +// * VFMADD231SS m32, xmm, xmm [FMA3] +// * VFMADD231SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFMADD231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMADD231SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMADD231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD231SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD231SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD231SS takes 3 or 4 operands") + } + // VFMADD231SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xb9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD231SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xb9) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFMADD231SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb9) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD231SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD231SS") + } + return p +} + +// VFMADDPD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADDPD +// Supported forms : (6 forms) +// +// * VFMADDPD xmm, xmm, xmm, xmm [FMA4] +// * VFMADDPD m128, xmm, xmm, xmm [FMA4] +// * VFMADDPD xmm, m128, xmm, xmm [FMA4] +// * VFMADDPD ymm, ymm, ymm, ymm [FMA4] +// * VFMADDPD m256, ymm, ymm, ymm [FMA4] +// * VFMADDPD ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMADDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMADDPD", 4, Operands { v0, v1, v2, v3 }) + // VFMADDPD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x69) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x69) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDPD m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x69) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDPD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x69) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDPD ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x69) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x69) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDPD m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x69) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDPD ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x69) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDPD") + } + return p +} + +// VFMADDPS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADDPS +// Supported forms : (6 forms) +// +// * VFMADDPS xmm, xmm, xmm, xmm [FMA4] +// * VFMADDPS m128, xmm, xmm, xmm [FMA4] +// * VFMADDPS xmm, m128, xmm, xmm [FMA4] +// * VFMADDPS ymm, ymm, ymm, ymm [FMA4] +// * VFMADDPS m256, ymm, ymm, ymm [FMA4] +// * VFMADDPS ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMADDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMADDPS", 4, Operands { v0, v1, v2, v3 }) + // VFMADDPS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x68) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x68) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDPS m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x68) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDPS xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x68) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDPS ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x68) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x68) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDPS m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x68) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDPS ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x68) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDPS") + } + return p +} + +// VFMADDSD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSD +// Supported forms : (3 forms) +// +// * VFMADDSD xmm, xmm, xmm, xmm [FMA4] +// * VFMADDSD m64, xmm, xmm, xmm [FMA4] +// * VFMADDSD xmm, m64, xmm, xmm [FMA4] +// +func (self *Program) VFMADDSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMADDSD", 4, Operands { v0, v1, v2, v3 }) + // VFMADDSD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSD m64, xmm, xmm, xmm + if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6b) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDSD xmm, m64, xmm, xmm + if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6b) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSD") + } + return p +} + +// VFMADDSS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSS +// Supported forms : (3 forms) +// +// * VFMADDSS xmm, xmm, xmm, xmm [FMA4] +// * VFMADDSS m32, xmm, xmm, xmm [FMA4] +// * VFMADDSS xmm, m32, xmm, xmm [FMA4] +// +func (self *Program) VFMADDSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMADDSS", 4, Operands { v0, v1, v2, v3 }) + // VFMADDSS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSS m32, xmm, xmm, xmm + if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6a) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDSS xmm, m32, xmm, xmm + if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6a) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSS") + } + return p +} + +// VFMADDSUB132PD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUB132PD +// Supported forms : (11 forms) +// +// * VFMADDSUB132PD xmm, xmm, xmm [FMA3] +// * VFMADDSUB132PD m128, xmm, xmm [FMA3] +// * VFMADDSUB132PD ymm, ymm, ymm [FMA3] +// * VFMADDSUB132PD m256, ymm, ymm [FMA3] +// * VFMADDSUB132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB132PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADDSUB132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADDSUB132PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADDSUB132PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADDSUB132PD takes 3 or 4 operands") + } + // VFMADDSUB132PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB132PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB132PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADDSUB132PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x96) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADDSUB132PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADDSUB132PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADDSUB132PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUB132PD") + } + return p +} + +// VFMADDSUB132PS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUB132PS +// Supported forms : (11 forms) +// +// * VFMADDSUB132PS xmm, xmm, xmm [FMA3] +// * VFMADDSUB132PS m128, xmm, xmm [FMA3] +// * VFMADDSUB132PS ymm, ymm, ymm [FMA3] +// * VFMADDSUB132PS m256, ymm, ymm [FMA3] +// * VFMADDSUB132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB132PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADDSUB132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADDSUB132PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADDSUB132PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADDSUB132PS takes 3 or 4 operands") + } + // VFMADDSUB132PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB132PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB132PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADDSUB132PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x96) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADDSUB132PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADDSUB132PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADDSUB132PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUB132PS") + } + return p +} + +// VFMADDSUB213PD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUB213PD +// Supported forms : (11 forms) +// +// * VFMADDSUB213PD xmm, xmm, xmm [FMA3] +// * VFMADDSUB213PD m128, xmm, xmm [FMA3] +// * VFMADDSUB213PD ymm, ymm, ymm [FMA3] +// * VFMADDSUB213PD m256, ymm, ymm [FMA3] +// * VFMADDSUB213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB213PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADDSUB213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADDSUB213PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADDSUB213PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADDSUB213PD takes 3 or 4 operands") + } + // VFMADDSUB213PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB213PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB213PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADDSUB213PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADDSUB213PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADDSUB213PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADDSUB213PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUB213PD") + } + return p +} + +// VFMADDSUB213PS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUB213PS +// Supported forms : (11 forms) +// +// * VFMADDSUB213PS xmm, xmm, xmm [FMA3] +// * VFMADDSUB213PS m128, xmm, xmm [FMA3] +// * VFMADDSUB213PS ymm, ymm, ymm [FMA3] +// * VFMADDSUB213PS m256, ymm, ymm [FMA3] +// * VFMADDSUB213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB213PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADDSUB213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADDSUB213PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADDSUB213PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADDSUB213PS takes 3 or 4 operands") + } + // VFMADDSUB213PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB213PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB213PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADDSUB213PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADDSUB213PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADDSUB213PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADDSUB213PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUB213PS") + } + return p +} + +// VFMADDSUB231PD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUB231PD +// Supported forms : (11 forms) +// +// * VFMADDSUB231PD xmm, xmm, xmm [FMA3] +// * VFMADDSUB231PD m128, xmm, xmm [FMA3] +// * VFMADDSUB231PD ymm, ymm, ymm [FMA3] +// * VFMADDSUB231PD m256, ymm, ymm [FMA3] +// * VFMADDSUB231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB231PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADDSUB231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADDSUB231PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADDSUB231PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADDSUB231PD takes 3 or 4 operands") + } + // VFMADDSUB231PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB231PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB231PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADDSUB231PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADDSUB231PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADDSUB231PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADDSUB231PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUB231PD") + } + return p +} + +// VFMADDSUB231PS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUB231PS +// Supported forms : (11 forms) +// +// * VFMADDSUB231PS xmm, xmm, xmm [FMA3] +// * VFMADDSUB231PS m128, xmm, xmm [FMA3] +// * VFMADDSUB231PS ymm, ymm, ymm [FMA3] +// * VFMADDSUB231PS m256, ymm, ymm [FMA3] +// * VFMADDSUB231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB231PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADDSUB231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADDSUB231PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADDSUB231PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADDSUB231PS takes 3 or 4 operands") + } + // VFMADDSUB231PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB231PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB231PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADDSUB231PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADDSUB231PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADDSUB231PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADDSUB231PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUB231PS") + } + return p +} + +// VFMADDSUBPD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUBPD +// Supported forms : (6 forms) +// +// * VFMADDSUBPD xmm, xmm, xmm, xmm [FMA4] +// * VFMADDSUBPD m128, xmm, xmm, xmm [FMA4] +// * VFMADDSUBPD xmm, m128, xmm, xmm [FMA4] +// * VFMADDSUBPD ymm, ymm, ymm, ymm [FMA4] +// * VFMADDSUBPD m256, ymm, ymm, ymm [FMA4] +// * VFMADDSUBPD ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMADDSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMADDSUBPD", 4, Operands { v0, v1, v2, v3 }) + // VFMADDSUBPD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSUBPD m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5d) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDSUBPD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSUBPD ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSUBPD m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5d) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDSUBPD ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUBPD") + } + return p +} + +// VFMADDSUBPS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUBPS +// Supported forms : (6 forms) +// +// * VFMADDSUBPS xmm, xmm, xmm, xmm [FMA4] +// * VFMADDSUBPS m128, xmm, xmm, xmm [FMA4] +// * VFMADDSUBPS xmm, m128, xmm, xmm [FMA4] +// * VFMADDSUBPS ymm, ymm, ymm, ymm [FMA4] +// * VFMADDSUBPS m256, ymm, ymm, ymm [FMA4] +// * VFMADDSUBPS ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMADDSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMADDSUBPS", 4, Operands { v0, v1, v2, v3 }) + // VFMADDSUBPS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSUBPS m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5c) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDSUBPS xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSUBPS ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSUBPS m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5c) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDSUBPS ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUBPS") + } + return p +} + +// VFMSUB132PD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB132PD +// Supported forms : (11 forms) +// +// * VFMSUB132PD xmm, xmm, xmm [FMA3] +// * VFMSUB132PD m128, xmm, xmm [FMA3] +// * VFMSUB132PD ymm, ymm, ymm [FMA3] +// * VFMSUB132PD m256, ymm, ymm [FMA3] +// * VFMSUB132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB132PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUB132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB132PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB132PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB132PD takes 3 or 4 operands") + } + // VFMSUB132PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB132PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB132PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUB132PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB132PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUB132PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUB132PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB132PD") + } + return p +} + +// VFMSUB132PS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB132PS +// Supported forms : (11 forms) +// +// * VFMSUB132PS xmm, xmm, xmm [FMA3] +// * VFMSUB132PS m128, xmm, xmm [FMA3] +// * VFMSUB132PS ymm, ymm, ymm [FMA3] +// * VFMSUB132PS m256, ymm, ymm [FMA3] +// * VFMSUB132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB132PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUB132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB132PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB132PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB132PS takes 3 or 4 operands") + } + // VFMSUB132PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB132PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB132PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUB132PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB132PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUB132PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUB132PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB132PS") + } + return p +} + +// VFMSUB132SD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB132SD +// Supported forms : (5 forms) +// +// * VFMSUB132SD xmm, xmm, xmm [FMA3] +// * VFMSUB132SD m64, xmm, xmm [FMA3] +// * VFMSUB132SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB132SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMSUB132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB132SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB132SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB132SD takes 3 or 4 operands") + } + // VFMSUB132SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB132SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x9b) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFMSUB132SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB132SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB132SD") + } + return p +} + +// VFMSUB132SS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB132SS +// Supported forms : (5 forms) +// +// * VFMSUB132SS xmm, xmm, xmm [FMA3] +// * VFMSUB132SS m32, xmm, xmm [FMA3] +// * VFMSUB132SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB132SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMSUB132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB132SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB132SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB132SS takes 3 or 4 operands") + } + // VFMSUB132SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB132SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x9b) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFMSUB132SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB132SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB132SS") + } + return p +} + +// VFMSUB213PD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB213PD +// Supported forms : (11 forms) +// +// * VFMSUB213PD xmm, xmm, xmm [FMA3] +// * VFMSUB213PD m128, xmm, xmm [FMA3] +// * VFMSUB213PD ymm, ymm, ymm [FMA3] +// * VFMSUB213PD m256, ymm, ymm [FMA3] +// * VFMSUB213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB213PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUB213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB213PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB213PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB213PD takes 3 or 4 operands") + } + // VFMSUB213PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB213PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB213PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUB213PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB213PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUB213PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUB213PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB213PD") + } + return p +} + +// VFMSUB213PS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB213PS +// Supported forms : (11 forms) +// +// * VFMSUB213PS xmm, xmm, xmm [FMA3] +// * VFMSUB213PS m128, xmm, xmm [FMA3] +// * VFMSUB213PS ymm, ymm, ymm [FMA3] +// * VFMSUB213PS m256, ymm, ymm [FMA3] +// * VFMSUB213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB213PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUB213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB213PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB213PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB213PS takes 3 or 4 operands") + } + // VFMSUB213PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB213PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB213PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUB213PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB213PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUB213PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUB213PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB213PS") + } + return p +} + +// VFMSUB213SD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB213SD +// Supported forms : (5 forms) +// +// * VFMSUB213SD xmm, xmm, xmm [FMA3] +// * VFMSUB213SD m64, xmm, xmm [FMA3] +// * VFMSUB213SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB213SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMSUB213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB213SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB213SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB213SD takes 3 or 4 operands") + } + // VFMSUB213SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xab) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xab) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB213SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xab) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFMSUB213SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xab) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB213SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xab) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB213SD") + } + return p +} + +// VFMSUB213SS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB213SS +// Supported forms : (5 forms) +// +// * VFMSUB213SS xmm, xmm, xmm [FMA3] +// * VFMSUB213SS m32, xmm, xmm [FMA3] +// * VFMSUB213SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB213SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMSUB213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB213SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB213SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB213SS takes 3 or 4 operands") + } + // VFMSUB213SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xab) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xab) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB213SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xab) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFMSUB213SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xab) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB213SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xab) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB213SS") + } + return p +} + +// VFMSUB231PD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB231PD +// Supported forms : (11 forms) +// +// * VFMSUB231PD xmm, xmm, xmm [FMA3] +// * VFMSUB231PD m128, xmm, xmm [FMA3] +// * VFMSUB231PD ymm, ymm, ymm [FMA3] +// * VFMSUB231PD m256, ymm, ymm [FMA3] +// * VFMSUB231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB231PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUB231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB231PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB231PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB231PD takes 3 or 4 operands") + } + // VFMSUB231PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB231PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB231PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUB231PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xba) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB231PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUB231PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUB231PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB231PD") + } + return p +} + +// VFMSUB231PS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB231PS +// Supported forms : (11 forms) +// +// * VFMSUB231PS xmm, xmm, xmm [FMA3] +// * VFMSUB231PS m128, xmm, xmm [FMA3] +// * VFMSUB231PS ymm, ymm, ymm [FMA3] +// * VFMSUB231PS m256, ymm, ymm [FMA3] +// * VFMSUB231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB231PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUB231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB231PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB231PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB231PS takes 3 or 4 operands") + } + // VFMSUB231PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB231PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB231PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUB231PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xba) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB231PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUB231PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUB231PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB231PS") + } + return p +} + +// VFMSUB231SD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB231SD +// Supported forms : (5 forms) +// +// * VFMSUB231SD xmm, xmm, xmm [FMA3] +// * VFMSUB231SD m64, xmm, xmm [FMA3] +// * VFMSUB231SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB231SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMSUB231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB231SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB231SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB231SD takes 3 or 4 operands") + } + // VFMSUB231SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB231SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xbb) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFMSUB231SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB231SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB231SD") + } + return p +} + +// VFMSUB231SS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB231SS +// Supported forms : (5 forms) +// +// * VFMSUB231SS xmm, xmm, xmm [FMA3] +// * VFMSUB231SS m32, xmm, xmm [FMA3] +// * VFMSUB231SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB231SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMSUB231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB231SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB231SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB231SS takes 3 or 4 operands") + } + // VFMSUB231SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB231SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xbb) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFMSUB231SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB231SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB231SS") + } + return p +} + +// VFMSUBADD132PD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADD132PD +// Supported forms : (11 forms) +// +// * VFMSUBADD132PD xmm, xmm, xmm [FMA3] +// * VFMSUBADD132PD m128, xmm, xmm [FMA3] +// * VFMSUBADD132PD ymm, ymm, ymm [FMA3] +// * VFMSUBADD132PD m256, ymm, ymm [FMA3] +// * VFMSUBADD132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD132PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUBADD132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUBADD132PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUBADD132PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUBADD132PD takes 3 or 4 operands") + } + // VFMSUBADD132PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD132PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD132PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUBADD132PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x97) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUBADD132PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUBADD132PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUBADD132PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADD132PD") + } + return p +} + +// VFMSUBADD132PS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADD132PS +// Supported forms : (11 forms) +// +// * VFMSUBADD132PS xmm, xmm, xmm [FMA3] +// * VFMSUBADD132PS m128, xmm, xmm [FMA3] +// * VFMSUBADD132PS ymm, ymm, ymm [FMA3] +// * VFMSUBADD132PS m256, ymm, ymm [FMA3] +// * VFMSUBADD132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD132PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUBADD132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUBADD132PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUBADD132PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUBADD132PS takes 3 or 4 operands") + } + // VFMSUBADD132PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD132PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD132PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUBADD132PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x97) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUBADD132PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUBADD132PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUBADD132PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADD132PS") + } + return p +} + +// VFMSUBADD213PD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADD213PD +// Supported forms : (11 forms) +// +// * VFMSUBADD213PD xmm, xmm, xmm [FMA3] +// * VFMSUBADD213PD m128, xmm, xmm [FMA3] +// * VFMSUBADD213PD ymm, ymm, ymm [FMA3] +// * VFMSUBADD213PD m256, ymm, ymm [FMA3] +// * VFMSUBADD213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD213PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUBADD213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUBADD213PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUBADD213PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUBADD213PD takes 3 or 4 operands") + } + // VFMSUBADD213PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD213PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD213PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUBADD213PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUBADD213PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUBADD213PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUBADD213PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADD213PD") + } + return p +} + +// VFMSUBADD213PS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADD213PS +// Supported forms : (11 forms) +// +// * VFMSUBADD213PS xmm, xmm, xmm [FMA3] +// * VFMSUBADD213PS m128, xmm, xmm [FMA3] +// * VFMSUBADD213PS ymm, ymm, ymm [FMA3] +// * VFMSUBADD213PS m256, ymm, ymm [FMA3] +// * VFMSUBADD213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD213PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUBADD213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUBADD213PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUBADD213PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUBADD213PS takes 3 or 4 operands") + } + // VFMSUBADD213PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD213PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD213PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUBADD213PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUBADD213PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUBADD213PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUBADD213PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADD213PS") + } + return p +} + +// VFMSUBADD231PD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADD231PD +// Supported forms : (11 forms) +// +// * VFMSUBADD231PD xmm, xmm, xmm [FMA3] +// * VFMSUBADD231PD m128, xmm, xmm [FMA3] +// * VFMSUBADD231PD ymm, ymm, ymm [FMA3] +// * VFMSUBADD231PD m256, ymm, ymm [FMA3] +// * VFMSUBADD231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD231PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUBADD231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUBADD231PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUBADD231PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUBADD231PD takes 3 or 4 operands") + } + // VFMSUBADD231PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD231PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD231PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUBADD231PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUBADD231PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUBADD231PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUBADD231PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADD231PD") + } + return p +} + +// VFMSUBADD231PS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADD231PS +// Supported forms : (11 forms) +// +// * VFMSUBADD231PS xmm, xmm, xmm [FMA3] +// * VFMSUBADD231PS m128, xmm, xmm [FMA3] +// * VFMSUBADD231PS ymm, ymm, ymm [FMA3] +// * VFMSUBADD231PS m256, ymm, ymm [FMA3] +// * VFMSUBADD231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD231PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUBADD231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUBADD231PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUBADD231PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUBADD231PS takes 3 or 4 operands") + } + // VFMSUBADD231PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD231PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD231PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUBADD231PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUBADD231PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUBADD231PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUBADD231PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADD231PS") + } + return p +} + +// VFMSUBADDPD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADDPD +// Supported forms : (6 forms) +// +// * VFMSUBADDPD xmm, xmm, xmm, xmm [FMA4] +// * VFMSUBADDPD m128, xmm, xmm, xmm [FMA4] +// * VFMSUBADDPD xmm, m128, xmm, xmm [FMA4] +// * VFMSUBADDPD ymm, ymm, ymm, ymm [FMA4] +// * VFMSUBADDPD m256, ymm, ymm, ymm [FMA4] +// * VFMSUBADDPD ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMSUBADDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMSUBADDPD", 4, Operands { v0, v1, v2, v3 }) + // VFMSUBADDPD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBADDPD m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5f) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBADDPD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBADDPD ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBADDPD m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5f) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBADDPD ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADDPD") + } + return p +} + +// VFMSUBADDPS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADDPS +// Supported forms : (6 forms) +// +// * VFMSUBADDPS xmm, xmm, xmm, xmm [FMA4] +// * VFMSUBADDPS m128, xmm, xmm, xmm [FMA4] +// * VFMSUBADDPS xmm, m128, xmm, xmm [FMA4] +// * VFMSUBADDPS ymm, ymm, ymm, ymm [FMA4] +// * VFMSUBADDPS m256, ymm, ymm, ymm [FMA4] +// * VFMSUBADDPS ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMSUBADDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMSUBADDPS", 4, Operands { v0, v1, v2, v3 }) + // VFMSUBADDPS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBADDPS m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5e) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBADDPS xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBADDPS ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBADDPS m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5e) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBADDPS ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADDPS") + } + return p +} + +// VFMSUBPD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBPD +// Supported forms : (6 forms) +// +// * VFMSUBPD xmm, xmm, xmm, xmm [FMA4] +// * VFMSUBPD m128, xmm, xmm, xmm [FMA4] +// * VFMSUBPD xmm, m128, xmm, xmm [FMA4] +// * VFMSUBPD ymm, ymm, ymm, ymm [FMA4] +// * VFMSUBPD m256, ymm, ymm, ymm [FMA4] +// * VFMSUBPD ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMSUBPD", 4, Operands { v0, v1, v2, v3 }) + // VFMSUBPD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBPD m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6d) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBPD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBPD ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBPD m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6d) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBPD ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBPD") + } + return p +} + +// VFMSUBPS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBPS +// Supported forms : (6 forms) +// +// * VFMSUBPS xmm, xmm, xmm, xmm [FMA4] +// * VFMSUBPS m128, xmm, xmm, xmm [FMA4] +// * VFMSUBPS xmm, m128, xmm, xmm [FMA4] +// * VFMSUBPS ymm, ymm, ymm, ymm [FMA4] +// * VFMSUBPS m256, ymm, ymm, ymm [FMA4] +// * VFMSUBPS ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMSUBPS", 4, Operands { v0, v1, v2, v3 }) + // VFMSUBPS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBPS m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6c) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBPS xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBPS ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBPS m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6c) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBPS ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBPS") + } + return p +} + +// VFMSUBSD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBSD +// Supported forms : (3 forms) +// +// * VFMSUBSD xmm, xmm, xmm, xmm [FMA4] +// * VFMSUBSD m64, xmm, xmm, xmm [FMA4] +// * VFMSUBSD xmm, m64, xmm, xmm [FMA4] +// +func (self *Program) VFMSUBSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMSUBSD", 4, Operands { v0, v1, v2, v3 }) + // VFMSUBSD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBSD m64, xmm, xmm, xmm + if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6f) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBSD xmm, m64, xmm, xmm + if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBSD") + } + return p +} + +// VFMSUBSS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBSS +// Supported forms : (3 forms) +// +// * VFMSUBSS xmm, xmm, xmm, xmm [FMA4] +// * VFMSUBSS m32, xmm, xmm, xmm [FMA4] +// * VFMSUBSS xmm, m32, xmm, xmm [FMA4] +// +func (self *Program) VFMSUBSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMSUBSS", 4, Operands { v0, v1, v2, v3 }) + // VFMSUBSS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBSS m32, xmm, xmm, xmm + if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6e) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBSS xmm, m32, xmm, xmm + if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBSS") + } + return p +} + +// VFNMADD132PD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD132PD +// Supported forms : (11 forms) +// +// * VFNMADD132PD xmm, xmm, xmm [FMA3] +// * VFNMADD132PD m128, xmm, xmm [FMA3] +// * VFNMADD132PD ymm, ymm, ymm [FMA3] +// * VFNMADD132PD m256, ymm, ymm [FMA3] +// * VFNMADD132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD132PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMADD132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD132PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD132PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD132PD takes 3 or 4 operands") + } + // VFNMADD132PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD132PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD132PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMADD132PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD132PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMADD132PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMADD132PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD132PD") + } + return p +} + +// VFNMADD132PS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD132PS +// Supported forms : (11 forms) +// +// * VFNMADD132PS xmm, xmm, xmm [FMA3] +// * VFNMADD132PS m128, xmm, xmm [FMA3] +// * VFNMADD132PS ymm, ymm, ymm [FMA3] +// * VFNMADD132PS m256, ymm, ymm [FMA3] +// * VFNMADD132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD132PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMADD132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD132PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD132PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD132PS takes 3 or 4 operands") + } + // VFNMADD132PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD132PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD132PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMADD132PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD132PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMADD132PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMADD132PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD132PS") + } + return p +} + +// VFNMADD132SD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD132SD +// Supported forms : (5 forms) +// +// * VFNMADD132SD xmm, xmm, xmm [FMA3] +// * VFNMADD132SD m64, xmm, xmm [FMA3] +// * VFNMADD132SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD132SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMADD132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD132SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD132SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD132SD takes 3 or 4 operands") + } + // VFNMADD132SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD132SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x9d) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFNMADD132SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD132SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD132SD") + } + return p +} + +// VFNMADD132SS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD132SS +// Supported forms : (5 forms) +// +// * VFNMADD132SS xmm, xmm, xmm [FMA3] +// * VFNMADD132SS m32, xmm, xmm [FMA3] +// * VFNMADD132SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD132SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMADD132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD132SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD132SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD132SS takes 3 or 4 operands") + } + // VFNMADD132SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD132SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x9d) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFNMADD132SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD132SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD132SS") + } + return p +} + +// VFNMADD213PD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD213PD +// Supported forms : (11 forms) +// +// * VFNMADD213PD xmm, xmm, xmm [FMA3] +// * VFNMADD213PD m128, xmm, xmm [FMA3] +// * VFNMADD213PD ymm, ymm, ymm [FMA3] +// * VFNMADD213PD m256, ymm, ymm [FMA3] +// * VFNMADD213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD213PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMADD213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD213PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD213PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD213PD takes 3 or 4 operands") + } + // VFNMADD213PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD213PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD213PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMADD213PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xac) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD213PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMADD213PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMADD213PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD213PD") + } + return p +} + +// VFNMADD213PS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD213PS +// Supported forms : (11 forms) +// +// * VFNMADD213PS xmm, xmm, xmm [FMA3] +// * VFNMADD213PS m128, xmm, xmm [FMA3] +// * VFNMADD213PS ymm, ymm, ymm [FMA3] +// * VFNMADD213PS m256, ymm, ymm [FMA3] +// * VFNMADD213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD213PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMADD213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD213PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD213PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD213PS takes 3 or 4 operands") + } + // VFNMADD213PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD213PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD213PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMADD213PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xac) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD213PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMADD213PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMADD213PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD213PS") + } + return p +} + +// VFNMADD213SD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD213SD +// Supported forms : (5 forms) +// +// * VFNMADD213SD xmm, xmm, xmm [FMA3] +// * VFNMADD213SD m64, xmm, xmm [FMA3] +// * VFNMADD213SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD213SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMADD213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD213SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD213SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD213SD takes 3 or 4 operands") + } + // VFNMADD213SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xad) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xad) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD213SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xad) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFNMADD213SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xad) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD213SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xad) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD213SD") + } + return p +} + +// VFNMADD213SS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD213SS +// Supported forms : (5 forms) +// +// * VFNMADD213SS xmm, xmm, xmm [FMA3] +// * VFNMADD213SS m32, xmm, xmm [FMA3] +// * VFNMADD213SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD213SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMADD213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD213SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD213SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD213SS takes 3 or 4 operands") + } + // VFNMADD213SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xad) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xad) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD213SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xad) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFNMADD213SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xad) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD213SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xad) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD213SS") + } + return p +} + +// VFNMADD231PD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD231PD +// Supported forms : (11 forms) +// +// * VFNMADD231PD xmm, xmm, xmm [FMA3] +// * VFNMADD231PD m128, xmm, xmm [FMA3] +// * VFNMADD231PD ymm, ymm, ymm [FMA3] +// * VFNMADD231PD m256, ymm, ymm [FMA3] +// * VFNMADD231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD231PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMADD231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD231PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD231PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD231PD takes 3 or 4 operands") + } + // VFNMADD231PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD231PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD231PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMADD231PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD231PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMADD231PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMADD231PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD231PD") + } + return p +} + +// VFNMADD231PS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD231PS +// Supported forms : (11 forms) +// +// * VFNMADD231PS xmm, xmm, xmm [FMA3] +// * VFNMADD231PS m128, xmm, xmm [FMA3] +// * VFNMADD231PS ymm, ymm, ymm [FMA3] +// * VFNMADD231PS m256, ymm, ymm [FMA3] +// * VFNMADD231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD231PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMADD231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD231PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD231PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD231PS takes 3 or 4 operands") + } + // VFNMADD231PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD231PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD231PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMADD231PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD231PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMADD231PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMADD231PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD231PS") + } + return p +} + +// VFNMADD231SD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD231SD +// Supported forms : (5 forms) +// +// * VFNMADD231SD xmm, xmm, xmm [FMA3] +// * VFNMADD231SD m64, xmm, xmm [FMA3] +// * VFNMADD231SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD231SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMADD231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD231SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD231SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD231SD takes 3 or 4 operands") + } + // VFNMADD231SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbd) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD231SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xbd) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFNMADD231SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD231SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD231SD") + } + return p +} + +// VFNMADD231SS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD231SS +// Supported forms : (5 forms) +// +// * VFNMADD231SS xmm, xmm, xmm [FMA3] +// * VFNMADD231SS m32, xmm, xmm [FMA3] +// * VFNMADD231SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD231SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMADD231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD231SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD231SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD231SS takes 3 or 4 operands") + } + // VFNMADD231SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbd) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD231SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xbd) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFNMADD231SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD231SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD231SS") + } + return p +} + +// VFNMADDPD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADDPD +// Supported forms : (6 forms) +// +// * VFNMADDPD xmm, xmm, xmm, xmm [FMA4] +// * VFNMADDPD m128, xmm, xmm, xmm [FMA4] +// * VFNMADDPD xmm, m128, xmm, xmm [FMA4] +// * VFNMADDPD ymm, ymm, ymm, ymm [FMA4] +// * VFNMADDPD m256, ymm, ymm, ymm [FMA4] +// * VFNMADDPD ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFNMADDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMADDPD", 4, Operands { v0, v1, v2, v3 }) + // VFNMADDPD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x79) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x79) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDPD m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x79) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMADDPD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x79) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDPD ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x79) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x79) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDPD m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x79) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMADDPD ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x79) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADDPD") + } + return p +} + +// VFNMADDPS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADDPS +// Supported forms : (6 forms) +// +// * VFNMADDPS xmm, xmm, xmm, xmm [FMA4] +// * VFNMADDPS m128, xmm, xmm, xmm [FMA4] +// * VFNMADDPS xmm, m128, xmm, xmm [FMA4] +// * VFNMADDPS ymm, ymm, ymm, ymm [FMA4] +// * VFNMADDPS m256, ymm, ymm, ymm [FMA4] +// * VFNMADDPS ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFNMADDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMADDPS", 4, Operands { v0, v1, v2, v3 }) + // VFNMADDPS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x78) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x78) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDPS m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x78) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMADDPS xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x78) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDPS ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x78) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x78) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDPS m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x78) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMADDPS ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x78) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADDPS") + } + return p +} + +// VFNMADDSD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADDSD +// Supported forms : (3 forms) +// +// * VFNMADDSD xmm, xmm, xmm, xmm [FMA4] +// * VFNMADDSD m64, xmm, xmm, xmm [FMA4] +// * VFNMADDSD xmm, m64, xmm, xmm [FMA4] +// +func (self *Program) VFNMADDSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMADDSD", 4, Operands { v0, v1, v2, v3 }) + // VFNMADDSD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDSD m64, xmm, xmm, xmm + if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7b) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMADDSD xmm, m64, xmm, xmm + if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7b) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADDSD") + } + return p +} + +// VFNMADDSS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADDSS +// Supported forms : (3 forms) +// +// * VFNMADDSS xmm, xmm, xmm, xmm [FMA4] +// * VFNMADDSS m32, xmm, xmm, xmm [FMA4] +// * VFNMADDSS xmm, m32, xmm, xmm [FMA4] +// +func (self *Program) VFNMADDSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMADDSS", 4, Operands { v0, v1, v2, v3 }) + // VFNMADDSS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDSS m32, xmm, xmm, xmm + if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7a) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMADDSS xmm, m32, xmm, xmm + if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7a) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADDSS") + } + return p +} + +// VFNMSUB132PD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB132PD +// Supported forms : (11 forms) +// +// * VFNMSUB132PD xmm, xmm, xmm [FMA3] +// * VFNMSUB132PD m128, xmm, xmm [FMA3] +// * VFNMSUB132PD ymm, ymm, ymm [FMA3] +// * VFNMSUB132PD m256, ymm, ymm [FMA3] +// * VFNMSUB132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB132PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMSUB132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB132PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB132PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB132PD takes 3 or 4 operands") + } + // VFNMSUB132PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB132PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB132PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMSUB132PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB132PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMSUB132PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMSUB132PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB132PD") + } + return p +} + +// VFNMSUB132PS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB132PS +// Supported forms : (11 forms) +// +// * VFNMSUB132PS xmm, xmm, xmm [FMA3] +// * VFNMSUB132PS m128, xmm, xmm [FMA3] +// * VFNMSUB132PS ymm, ymm, ymm [FMA3] +// * VFNMSUB132PS m256, ymm, ymm [FMA3] +// * VFNMSUB132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB132PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMSUB132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB132PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB132PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB132PS takes 3 or 4 operands") + } + // VFNMSUB132PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB132PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB132PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMSUB132PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB132PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMSUB132PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMSUB132PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB132PS") + } + return p +} + +// VFNMSUB132SD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB132SD +// Supported forms : (5 forms) +// +// * VFNMSUB132SD xmm, xmm, xmm [FMA3] +// * VFNMSUB132SD m64, xmm, xmm [FMA3] +// * VFNMSUB132SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB132SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMSUB132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB132SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB132SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB132SD takes 3 or 4 operands") + } + // VFNMSUB132SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB132SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x9f) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFNMSUB132SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB132SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB132SD") + } + return p +} + +// VFNMSUB132SS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB132SS +// Supported forms : (5 forms) +// +// * VFNMSUB132SS xmm, xmm, xmm [FMA3] +// * VFNMSUB132SS m32, xmm, xmm [FMA3] +// * VFNMSUB132SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB132SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMSUB132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB132SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB132SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB132SS takes 3 or 4 operands") + } + // VFNMSUB132SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB132SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x9f) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFNMSUB132SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB132SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB132SS") + } + return p +} + +// VFNMSUB213PD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB213PD +// Supported forms : (11 forms) +// +// * VFNMSUB213PD xmm, xmm, xmm [FMA3] +// * VFNMSUB213PD m128, xmm, xmm [FMA3] +// * VFNMSUB213PD ymm, ymm, ymm [FMA3] +// * VFNMSUB213PD m256, ymm, ymm [FMA3] +// * VFNMSUB213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB213PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMSUB213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB213PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB213PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB213PD takes 3 or 4 operands") + } + // VFNMSUB213PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB213PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB213PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMSUB213PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xae) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB213PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMSUB213PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMSUB213PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB213PD") + } + return p +} + +// VFNMSUB213PS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB213PS +// Supported forms : (11 forms) +// +// * VFNMSUB213PS xmm, xmm, xmm [FMA3] +// * VFNMSUB213PS m128, xmm, xmm [FMA3] +// * VFNMSUB213PS ymm, ymm, ymm [FMA3] +// * VFNMSUB213PS m256, ymm, ymm [FMA3] +// * VFNMSUB213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB213PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMSUB213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB213PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB213PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB213PS takes 3 or 4 operands") + } + // VFNMSUB213PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB213PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB213PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMSUB213PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xae) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB213PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMSUB213PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMSUB213PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB213PS") + } + return p +} + +// VFNMSUB213SD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB213SD +// Supported forms : (5 forms) +// +// * VFNMSUB213SD xmm, xmm, xmm [FMA3] +// * VFNMSUB213SD m64, xmm, xmm [FMA3] +// * VFNMSUB213SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB213SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMSUB213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB213SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB213SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB213SD takes 3 or 4 operands") + } + // VFNMSUB213SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xaf) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB213SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xaf) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFNMSUB213SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB213SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB213SD") + } + return p +} + +// VFNMSUB213SS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB213SS +// Supported forms : (5 forms) +// +// * VFNMSUB213SS xmm, xmm, xmm [FMA3] +// * VFNMSUB213SS m32, xmm, xmm [FMA3] +// * VFNMSUB213SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB213SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMSUB213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB213SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB213SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB213SS takes 3 or 4 operands") + } + // VFNMSUB213SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xaf) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB213SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xaf) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFNMSUB213SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB213SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB213SS") + } + return p +} + +// VFNMSUB231PD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB231PD +// Supported forms : (11 forms) +// +// * VFNMSUB231PD xmm, xmm, xmm [FMA3] +// * VFNMSUB231PD m128, xmm, xmm [FMA3] +// * VFNMSUB231PD ymm, ymm, ymm [FMA3] +// * VFNMSUB231PD m256, ymm, ymm [FMA3] +// * VFNMSUB231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB231PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMSUB231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB231PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB231PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB231PD takes 3 or 4 operands") + } + // VFNMSUB231PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB231PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB231PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMSUB231PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB231PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMSUB231PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMSUB231PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB231PD") + } + return p +} + +// VFNMSUB231PS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB231PS +// Supported forms : (11 forms) +// +// * VFNMSUB231PS xmm, xmm, xmm [FMA3] +// * VFNMSUB231PS m128, xmm, xmm [FMA3] +// * VFNMSUB231PS ymm, ymm, ymm [FMA3] +// * VFNMSUB231PS m256, ymm, ymm [FMA3] +// * VFNMSUB231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB231PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMSUB231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB231PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB231PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB231PS takes 3 or 4 operands") + } + // VFNMSUB231PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB231PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB231PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMSUB231PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB231PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMSUB231PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMSUB231PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB231PS") + } + return p +} + +// VFNMSUB231SD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB231SD +// Supported forms : (5 forms) +// +// * VFNMSUB231SD xmm, xmm, xmm [FMA3] +// * VFNMSUB231SD m64, xmm, xmm [FMA3] +// * VFNMSUB231SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB231SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMSUB231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB231SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB231SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB231SD takes 3 or 4 operands") + } + // VFNMSUB231SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbf) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB231SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xbf) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFNMSUB231SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB231SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB231SD") + } + return p +} + +// VFNMSUB231SS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB231SS +// Supported forms : (5 forms) +// +// * VFNMSUB231SS xmm, xmm, xmm [FMA3] +// * VFNMSUB231SS m32, xmm, xmm [FMA3] +// * VFNMSUB231SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB231SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMSUB231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB231SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB231SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB231SS takes 3 or 4 operands") + } + // VFNMSUB231SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbf) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB231SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xbf) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFNMSUB231SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB231SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB231SS") + } + return p +} + +// VFNMSUBPD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUBPD +// Supported forms : (6 forms) +// +// * VFNMSUBPD xmm, xmm, xmm, xmm [FMA4] +// * VFNMSUBPD m128, xmm, xmm, xmm [FMA4] +// * VFNMSUBPD xmm, m128, xmm, xmm [FMA4] +// * VFNMSUBPD ymm, ymm, ymm, ymm [FMA4] +// * VFNMSUBPD m256, ymm, ymm, ymm [FMA4] +// * VFNMSUBPD ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFNMSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMSUBPD", 4, Operands { v0, v1, v2, v3 }) + // VFNMSUBPD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBPD m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7d) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMSUBPD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBPD ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBPD m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7d) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMSUBPD ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUBPD") + } + return p +} + +// VFNMSUBPS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUBPS +// Supported forms : (6 forms) +// +// * VFNMSUBPS xmm, xmm, xmm, xmm [FMA4] +// * VFNMSUBPS m128, xmm, xmm, xmm [FMA4] +// * VFNMSUBPS xmm, m128, xmm, xmm [FMA4] +// * VFNMSUBPS ymm, ymm, ymm, ymm [FMA4] +// * VFNMSUBPS m256, ymm, ymm, ymm [FMA4] +// * VFNMSUBPS ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFNMSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMSUBPS", 4, Operands { v0, v1, v2, v3 }) + // VFNMSUBPS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBPS m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7c) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMSUBPS xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBPS ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBPS m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7c) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMSUBPS ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUBPS") + } + return p +} + +// VFNMSUBSD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUBSD +// Supported forms : (3 forms) +// +// * VFNMSUBSD xmm, xmm, xmm, xmm [FMA4] +// * VFNMSUBSD m64, xmm, xmm, xmm [FMA4] +// * VFNMSUBSD xmm, m64, xmm, xmm [FMA4] +// +func (self *Program) VFNMSUBSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMSUBSD", 4, Operands { v0, v1, v2, v3 }) + // VFNMSUBSD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBSD m64, xmm, xmm, xmm + if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7f) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMSUBSD xmm, m64, xmm, xmm + if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUBSD") + } + return p +} + +// VFNMSUBSS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUBSS +// Supported forms : (3 forms) +// +// * VFNMSUBSS xmm, xmm, xmm, xmm [FMA4] +// * VFNMSUBSS m32, xmm, xmm, xmm [FMA4] +// * VFNMSUBSS xmm, m32, xmm, xmm [FMA4] +// +func (self *Program) VFNMSUBSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMSUBSS", 4, Operands { v0, v1, v2, v3 }) + // VFNMSUBSS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBSS m32, xmm, xmm, xmm + if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7e) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMSUBSS xmm, m32, xmm, xmm + if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUBSS") + } + return p +} + +// VFPCLASSPD performs "Test Class of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFPCLASSPD +// Supported forms : (6 forms) +// +// * VFPCLASSPD imm8, m512/m64bcst, k{k} [AVX512DQ] +// * VFPCLASSPD imm8, zmm, k{k} [AVX512DQ] +// * VFPCLASSPD imm8, m128/m64bcst, k{k} [AVX512DQ,AVX512VL] +// * VFPCLASSPD imm8, m256/m64bcst, k{k} [AVX512DQ,AVX512VL] +// * VFPCLASSPD imm8, xmm, k{k} [AVX512DQ,AVX512VL] +// * VFPCLASSPD imm8, ymm, k{k} [AVX512DQ,AVX512VL] +// +func (self *Program) VFPCLASSPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VFPCLASSPD", 3, Operands { v0, v1, v2 }) + // VFPCLASSPD imm8, m512/m64bcst, k{k} + if isImm8(v0) && isM512M64bcst(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPD imm8, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit(kcode(v[2]) | 0x48) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPD imm8, m128/m64bcst, k{k} + if isImm8(v0) && isM128M64bcst(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPD imm8, m256/m64bcst, k{k} + if isImm8(v0) && isM256M64bcst(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPD imm8, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit(kcode(v[2]) | 0x08) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPD imm8, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit(kcode(v[2]) | 0x28) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFPCLASSPD") + } + return p +} + +// VFPCLASSPS performs "Test Class of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFPCLASSPS +// Supported forms : (6 forms) +// +// * VFPCLASSPS imm8, m512/m32bcst, k{k} [AVX512DQ] +// * VFPCLASSPS imm8, zmm, k{k} [AVX512DQ] +// * VFPCLASSPS imm8, m128/m32bcst, k{k} [AVX512DQ,AVX512VL] +// * VFPCLASSPS imm8, m256/m32bcst, k{k} [AVX512DQ,AVX512VL] +// * VFPCLASSPS imm8, xmm, k{k} [AVX512DQ,AVX512VL] +// * VFPCLASSPS imm8, ymm, k{k} [AVX512DQ,AVX512VL] +// +func (self *Program) VFPCLASSPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VFPCLASSPS", 3, Operands { v0, v1, v2 }) + // VFPCLASSPS imm8, m512/m32bcst, k{k} + if isImm8(v0) && isM512M32bcst(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPS imm8, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit(kcode(v[2]) | 0x48) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPS imm8, m128/m32bcst, k{k} + if isImm8(v0) && isM128M32bcst(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPS imm8, m256/m32bcst, k{k} + if isImm8(v0) && isM256M32bcst(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPS imm8, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit(kcode(v[2]) | 0x08) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPS imm8, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit(kcode(v[2]) | 0x28) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFPCLASSPS") + } + return p +} + +// VFPCLASSSD performs "Test Class of Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VFPCLASSSD +// Supported forms : (2 forms) +// +// * VFPCLASSSD imm8, xmm, k{k} [AVX512DQ] +// * VFPCLASSSD imm8, m64, k{k} [AVX512DQ] +// +func (self *Program) VFPCLASSSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VFPCLASSSD", 3, Operands { v0, v1, v2 }) + // VFPCLASSSD imm8, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit(kcode(v[2]) | 0x08) + m.emit(0x67) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSSD imm8, m64, k{k} + if isImm8(v0) && isM64(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, 0) + m.emit(0x67) + m.mrsd(lcode(v[2]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFPCLASSSD") + } + return p +} + +// VFPCLASSSS performs "Test Class of Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VFPCLASSSS +// Supported forms : (2 forms) +// +// * VFPCLASSSS imm8, xmm, k{k} [AVX512DQ] +// * VFPCLASSSS imm8, m32, k{k} [AVX512DQ] +// +func (self *Program) VFPCLASSSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VFPCLASSSS", 3, Operands { v0, v1, v2 }) + // VFPCLASSSS imm8, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit(kcode(v[2]) | 0x08) + m.emit(0x67) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSSS imm8, m32, k{k} + if isImm8(v0) && isM32(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, 0) + m.emit(0x67) + m.mrsd(lcode(v[2]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFPCLASSSS") + } + return p +} + +// VFRCZPD performs "Extract Fraction Packed Double-Precision Floating-Point". +// +// Mnemonic : VFRCZPD +// Supported forms : (4 forms) +// +// * VFRCZPD xmm, xmm [XOP] +// * VFRCZPD m128, xmm [XOP] +// * VFRCZPD ymm, ymm [XOP] +// * VFRCZPD m256, ymm [XOP] +// +func (self *Program) VFRCZPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VFRCZPD", 2, Operands { v0, v1 }) + // VFRCZPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0x81) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VFRCZPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0x81) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VFRCZPD ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7c) + m.emit(0x81) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VFRCZPD m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x04, hcode(v[1]), addr(v[0]), 0) + m.emit(0x81) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VFRCZPD") + } + return p +} + +// VFRCZPS performs "Extract Fraction Packed Single-Precision Floating-Point". +// +// Mnemonic : VFRCZPS +// Supported forms : (4 forms) +// +// * VFRCZPS xmm, xmm [XOP] +// * VFRCZPS m128, xmm [XOP] +// * VFRCZPS ymm, ymm [XOP] +// * VFRCZPS m256, ymm [XOP] +// +func (self *Program) VFRCZPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VFRCZPS", 2, Operands { v0, v1 }) + // VFRCZPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0x80) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VFRCZPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0x80) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VFRCZPS ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7c) + m.emit(0x80) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VFRCZPS m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x04, hcode(v[1]), addr(v[0]), 0) + m.emit(0x80) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VFRCZPS") + } + return p +} + +// VFRCZSD performs "Extract Fraction Scalar Double-Precision Floating-Point". +// +// Mnemonic : VFRCZSD +// Supported forms : (2 forms) +// +// * VFRCZSD xmm, xmm [XOP] +// * VFRCZSD m64, xmm [XOP] +// +func (self *Program) VFRCZSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VFRCZSD", 2, Operands { v0, v1 }) + // VFRCZSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0x83) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VFRCZSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0x83) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VFRCZSD") + } + return p +} + +// VFRCZSS performs "Extract Fraction Scalar Single-Precision Floating Point". +// +// Mnemonic : VFRCZSS +// Supported forms : (2 forms) +// +// * VFRCZSS xmm, xmm [XOP] +// * VFRCZSS m32, xmm [XOP] +// +func (self *Program) VFRCZSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VFRCZSS", 2, Operands { v0, v1 }) + // VFRCZSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0x82) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VFRCZSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0x82) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VFRCZSS") + } + return p +} + +// VGATHERDPD performs "Gather Packed Double-Precision Floating-Point Values Using Signed Doubleword Indices". +// +// Mnemonic : VGATHERDPD +// Supported forms : (5 forms) +// +// * VGATHERDPD xmm, vm32x, xmm [AVX2] +// * VGATHERDPD ymm, vm32x, ymm [AVX2] +// * VGATHERDPD vm32y, zmm{k} [AVX512F] +// * VGATHERDPD vm32x, xmm{k} [AVX512F,AVX512VL] +// * VGATHERDPD vm32x, ymm{k} [AVX512F,AVX512VL] +// +func (self *Program) VGATHERDPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGATHERDPD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VGATHERDPD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VGATHERDPD takes 2 or 3 operands") + } + // VGATHERDPD xmm, vm32x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x92) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERDPD ymm, vm32x, ymm + if len(vv) == 1 && isYMM(v0) && isVMX(v1) && isYMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x92) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERDPD vm32y, zmm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isZMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x92) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VGATHERDPD vm32x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x92) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VGATHERDPD vm32x, ymm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isYMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x92) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERDPD") + } + return p +} + +// VGATHERDPS performs "Gather Packed Single-Precision Floating-Point Values Using Signed Doubleword Indices". +// +// Mnemonic : VGATHERDPS +// Supported forms : (5 forms) +// +// * VGATHERDPS xmm, vm32x, xmm [AVX2] +// * VGATHERDPS ymm, vm32y, ymm [AVX2] +// * VGATHERDPS vm32z, zmm{k} [AVX512F] +// * VGATHERDPS vm32x, xmm{k} [AVX512F,AVX512VL] +// * VGATHERDPS vm32y, ymm{k} [AVX512F,AVX512VL] +// +func (self *Program) VGATHERDPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGATHERDPS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VGATHERDPS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VGATHERDPS takes 2 or 3 operands") + } + // VGATHERDPS xmm, vm32x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x92) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERDPS ymm, vm32y, ymm + if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x92) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERDPS vm32z, zmm{k} + if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x92) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VGATHERDPS vm32x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x92) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VGATHERDPS vm32y, ymm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x92) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERDPS") + } + return p +} + +// VGATHERPF0DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint". +// +// Mnemonic : VGATHERPF0DPD +// Supported forms : (1 form) +// +// * VGATHERPF0DPD vm32y{k} [AVX512PF] +// +func (self *Program) VGATHERPF0DPD(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF0DPD", 1, Operands { v0 }) + // VGATHERPF0DPD vm32y{k} + if isVMYk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(1, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF0DPD") + } + return p +} + +// VGATHERPF0DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint". +// +// Mnemonic : VGATHERPF0DPS +// Supported forms : (1 form) +// +// * VGATHERPF0DPS vm32z{k} [AVX512PF] +// +func (self *Program) VGATHERPF0DPS(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF0DPS", 1, Operands { v0 }) + // VGATHERPF0DPS vm32z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(1, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF0DPS") + } + return p +} + +// VGATHERPF0QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint". +// +// Mnemonic : VGATHERPF0QPD +// Supported forms : (1 form) +// +// * VGATHERPF0QPD vm64z{k} [AVX512PF] +// +func (self *Program) VGATHERPF0QPD(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF0QPD", 1, Operands { v0 }) + // VGATHERPF0QPD vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(1, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF0QPD") + } + return p +} + +// VGATHERPF0QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint". +// +// Mnemonic : VGATHERPF0QPS +// Supported forms : (1 form) +// +// * VGATHERPF0QPS vm64z{k} [AVX512PF] +// +func (self *Program) VGATHERPF0QPS(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF0QPS", 1, Operands { v0 }) + // VGATHERPF0QPS vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(1, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF0QPS") + } + return p +} + +// VGATHERPF1DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint". +// +// Mnemonic : VGATHERPF1DPD +// Supported forms : (1 form) +// +// * VGATHERPF1DPD vm32y{k} [AVX512PF] +// +func (self *Program) VGATHERPF1DPD(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF1DPD", 1, Operands { v0 }) + // VGATHERPF1DPD vm32y{k} + if isVMYk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(2, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF1DPD") + } + return p +} + +// VGATHERPF1DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint". +// +// Mnemonic : VGATHERPF1DPS +// Supported forms : (1 form) +// +// * VGATHERPF1DPS vm32z{k} [AVX512PF] +// +func (self *Program) VGATHERPF1DPS(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF1DPS", 1, Operands { v0 }) + // VGATHERPF1DPS vm32z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(2, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF1DPS") + } + return p +} + +// VGATHERPF1QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint". +// +// Mnemonic : VGATHERPF1QPD +// Supported forms : (1 form) +// +// * VGATHERPF1QPD vm64z{k} [AVX512PF] +// +func (self *Program) VGATHERPF1QPD(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF1QPD", 1, Operands { v0 }) + // VGATHERPF1QPD vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(2, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF1QPD") + } + return p +} + +// VGATHERPF1QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint". +// +// Mnemonic : VGATHERPF1QPS +// Supported forms : (1 form) +// +// * VGATHERPF1QPS vm64z{k} [AVX512PF] +// +func (self *Program) VGATHERPF1QPS(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF1QPS", 1, Operands { v0 }) + // VGATHERPF1QPS vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(2, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF1QPS") + } + return p +} + +// VGATHERQPD performs "Gather Packed Double-Precision Floating-Point Values Using Signed Quadword Indices". +// +// Mnemonic : VGATHERQPD +// Supported forms : (5 forms) +// +// * VGATHERQPD xmm, vm64x, xmm [AVX2] +// * VGATHERQPD ymm, vm64y, ymm [AVX2] +// * VGATHERQPD vm64z, zmm{k} [AVX512F] +// * VGATHERQPD vm64x, xmm{k} [AVX512F,AVX512VL] +// * VGATHERQPD vm64y, ymm{k} [AVX512F,AVX512VL] +// +func (self *Program) VGATHERQPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGATHERQPD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VGATHERQPD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VGATHERQPD takes 2 or 3 operands") + } + // VGATHERQPD xmm, vm64x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x93) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERQPD ymm, vm64y, ymm + if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x93) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERQPD vm64z, zmm{k} + if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x93) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VGATHERQPD vm64x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x93) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VGATHERQPD vm64y, ymm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x93) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERQPD") + } + return p +} + +// VGATHERQPS performs "Gather Packed Single-Precision Floating-Point Values Using Signed Quadword Indices". +// +// Mnemonic : VGATHERQPS +// Supported forms : (5 forms) +// +// * VGATHERQPS xmm, vm64x, xmm [AVX2] +// * VGATHERQPS xmm, vm64y, xmm [AVX2] +// * VGATHERQPS vm64z, ymm{k} [AVX512F] +// * VGATHERQPS vm64x, xmm{k} [AVX512F,AVX512VL] +// * VGATHERQPS vm64y, xmm{k} [AVX512F,AVX512VL] +// +func (self *Program) VGATHERQPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGATHERQPS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VGATHERQPS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VGATHERQPS takes 2 or 3 operands") + } + // VGATHERQPS xmm, vm64x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x93) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERQPS xmm, vm64y, xmm + if len(vv) == 1 && isXMM(v0) && isVMY(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x93) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERQPS vm64z, ymm{k} + if len(vv) == 0 && isVMZ(v0) && isYMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x93) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VGATHERQPS vm64x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x93) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VGATHERQPS vm64y, xmm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x93) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERQPS") + } + return p +} + +// VGETEXPPD performs "Extract Exponents of Packed Double-Precision Floating-Point Values as Double-Precision Floating-Point Values". +// +// Mnemonic : VGETEXPPD +// Supported forms : (7 forms) +// +// * VGETEXPPD m512/m64bcst, zmm{k}{z} [AVX512F] +// * VGETEXPPD {sae}, zmm, zmm{k}{z} [AVX512F] +// * VGETEXPPD zmm, zmm{k}{z} [AVX512F] +// * VGETEXPPD m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETEXPPD m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VGETEXPPD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETEXPPD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VGETEXPPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETEXPPD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VGETEXPPD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VGETEXPPD takes 2 or 3 operands") + } + // VGETEXPPD m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VGETEXPPD {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x42) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VGETEXPPD zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VGETEXPPD m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VGETEXPPD m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VGETEXPPD xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VGETEXPPD ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETEXPPD") + } + return p +} + +// VGETEXPPS performs "Extract Exponents of Packed Single-Precision Floating-Point Values as Single-Precision Floating-Point Values". +// +// Mnemonic : VGETEXPPS +// Supported forms : (7 forms) +// +// * VGETEXPPS m512/m32bcst, zmm{k}{z} [AVX512F] +// * VGETEXPPS {sae}, zmm, zmm{k}{z} [AVX512F] +// * VGETEXPPS zmm, zmm{k}{z} [AVX512F] +// * VGETEXPPS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETEXPPS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VGETEXPPS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETEXPPS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VGETEXPPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETEXPPS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VGETEXPPS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VGETEXPPS takes 2 or 3 operands") + } + // VGETEXPPS m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VGETEXPPS {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x42) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VGETEXPPS zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VGETEXPPS m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VGETEXPPS m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VGETEXPPS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VGETEXPPS ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETEXPPS") + } + return p +} + +// VGETEXPSD performs "Extract Exponent of Scalar Double-Precision Floating-Point Value as Double-Precision Floating-Point Value". +// +// Mnemonic : VGETEXPSD +// Supported forms : (3 forms) +// +// * VGETEXPSD m64, xmm, xmm{k}{z} [AVX512F] +// * VGETEXPSD {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VGETEXPSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VGETEXPSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETEXPSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VGETEXPSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VGETEXPSD takes 3 or 4 operands") + } + // VGETEXPSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x43) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VGETEXPSD {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x43) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VGETEXPSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x43) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETEXPSD") + } + return p +} + +// VGETEXPSS performs "Extract Exponent of Scalar Single-Precision Floating-Point Value as Single-Precision Floating-Point Value". +// +// Mnemonic : VGETEXPSS +// Supported forms : (3 forms) +// +// * VGETEXPSS m32, xmm, xmm{k}{z} [AVX512F] +// * VGETEXPSS {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VGETEXPSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VGETEXPSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETEXPSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VGETEXPSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VGETEXPSS takes 3 or 4 operands") + } + // VGETEXPSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x43) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VGETEXPSS {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x43) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VGETEXPSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x43) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETEXPSS") + } + return p +} + +// VGETMANTPD performs "Extract Normalized Mantissas from Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VGETMANTPD +// Supported forms : (7 forms) +// +// * VGETMANTPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VGETMANTPD imm8, {sae}, zmm, zmm{k}{z} [AVX512F] +// * VGETMANTPD imm8, zmm, zmm{k}{z} [AVX512F] +// * VGETMANTPD imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETMANTPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VGETMANTPD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETMANTPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VGETMANTPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETMANTPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VGETMANTPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VGETMANTPD takes 3 or 4 operands") + } + // VGETMANTPD imm8, m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPD imm8, {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18) + m.emit(0x26) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPD imm8, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPD imm8, m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPD imm8, m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPD imm8, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPD imm8, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETMANTPD") + } + return p +} + +// VGETMANTPS performs "Extract Normalized Mantissas from Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VGETMANTPS +// Supported forms : (7 forms) +// +// * VGETMANTPS imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VGETMANTPS imm8, {sae}, zmm, zmm{k}{z} [AVX512F] +// * VGETMANTPS imm8, zmm, zmm{k}{z} [AVX512F] +// * VGETMANTPS imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETMANTPS imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VGETMANTPS imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETMANTPS imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VGETMANTPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETMANTPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VGETMANTPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VGETMANTPS takes 3 or 4 operands") + } + // VGETMANTPS imm8, m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPS imm8, {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18) + m.emit(0x26) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPS imm8, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPS imm8, m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPS imm8, m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPS imm8, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPS imm8, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETMANTPS") + } + return p +} + +// VGETMANTSD performs "Extract Normalized Mantissa from Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VGETMANTSD +// Supported forms : (3 forms) +// +// * VGETMANTSD imm8, m64, xmm, xmm{k}{z} [AVX512F] +// * VGETMANTSD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VGETMANTSD imm8, xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VGETMANTSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETMANTSD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VGETMANTSD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VGETMANTSD takes 4 or 5 operands") + } + // VGETMANTSD imm8, m64, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x27) + m.mrsd(lcode(v[3]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTSD imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x27) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTSD imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x27) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETMANTSD") + } + return p +} + +// VGETMANTSS performs "Extract Normalized Mantissa from Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VGETMANTSS +// Supported forms : (3 forms) +// +// * VGETMANTSS imm8, m32, xmm, xmm{k}{z} [AVX512F] +// * VGETMANTSS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VGETMANTSS imm8, xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VGETMANTSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETMANTSS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VGETMANTSS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VGETMANTSS takes 4 or 5 operands") + } + // VGETMANTSS imm8, m32, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x27) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTSS imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x27) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTSS imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x27) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETMANTSS") + } + return p +} + +// VHADDPD performs "Packed Double-FP Horizontal Add". +// +// Mnemonic : VHADDPD +// Supported forms : (4 forms) +// +// * VHADDPD xmm, xmm, xmm [AVX] +// * VHADDPD m128, xmm, xmm [AVX] +// * VHADDPD ymm, ymm, ymm [AVX] +// * VHADDPD m256, ymm, ymm [AVX] +// +func (self *Program) VHADDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VHADDPD", 3, Operands { v0, v1, v2 }) + // VHADDPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHADDPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VHADDPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHADDPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VHADDPD") + } + return p +} + +// VHADDPS performs "Packed Single-FP Horizontal Add". +// +// Mnemonic : VHADDPS +// Supported forms : (4 forms) +// +// * VHADDPS xmm, xmm, xmm [AVX] +// * VHADDPS m128, xmm, xmm [AVX] +// * VHADDPS ymm, ymm, ymm [AVX] +// * VHADDPS m256, ymm, ymm [AVX] +// +func (self *Program) VHADDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VHADDPS", 3, Operands { v0, v1, v2 }) + // VHADDPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHADDPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VHADDPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHADDPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VHADDPS") + } + return p +} + +// VHSUBPD performs "Packed Double-FP Horizontal Subtract". +// +// Mnemonic : VHSUBPD +// Supported forms : (4 forms) +// +// * VHSUBPD xmm, xmm, xmm [AVX] +// * VHSUBPD m128, xmm, xmm [AVX] +// * VHSUBPD ymm, ymm, ymm [AVX] +// * VHSUBPD m256, ymm, ymm [AVX] +// +func (self *Program) VHSUBPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VHSUBPD", 3, Operands { v0, v1, v2 }) + // VHSUBPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHSUBPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VHSUBPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHSUBPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VHSUBPD") + } + return p +} + +// VHSUBPS performs "Packed Single-FP Horizontal Subtract". +// +// Mnemonic : VHSUBPS +// Supported forms : (4 forms) +// +// * VHSUBPS xmm, xmm, xmm [AVX] +// * VHSUBPS m128, xmm, xmm [AVX] +// * VHSUBPS ymm, ymm, ymm [AVX] +// * VHSUBPS m256, ymm, ymm [AVX] +// +func (self *Program) VHSUBPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VHSUBPS", 3, Operands { v0, v1, v2 }) + // VHSUBPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHSUBPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VHSUBPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHSUBPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VHSUBPS") + } + return p +} + +// VINSERTF128 performs "Insert Packed Floating-Point Values". +// +// Mnemonic : VINSERTF128 +// Supported forms : (2 forms) +// +// * VINSERTF128 imm8, xmm, ymm, ymm [AVX] +// * VINSERTF128 imm8, m128, ymm, ymm [AVX] +// +func (self *Program) VINSERTF128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTF128", 4, Operands { v0, v1, v2, v3 }) + // VINSERTF128 imm8, xmm, ymm, ymm + if isImm8(v0) && isXMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x18) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF128 imm8, m128, ymm, ymm + if isImm8(v0) && isM128(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x18) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTF128") + } + return p +} + +// VINSERTF32X4 performs "Insert 128 Bits of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VINSERTF32X4 +// Supported forms : (4 forms) +// +// * VINSERTF32X4 imm8, xmm, zmm, zmm{k}{z} [AVX512F] +// * VINSERTF32X4 imm8, m128, zmm, zmm{k}{z} [AVX512F] +// * VINSERTF32X4 imm8, xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VINSERTF32X4 imm8, m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VINSERTF32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTF32X4", 4, Operands { v0, v1, v2, v3 }) + // VINSERTF32X4 imm8, xmm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x18) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF32X4 imm8, m128, zmm, zmm{k}{z} + if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x18) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF32X4 imm8, xmm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x18) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF32X4 imm8, m128, ymm, ymm{k}{z} + if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x18) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTF32X4") + } + return p +} + +// VINSERTF32X8 performs "Insert 256 Bits of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VINSERTF32X8 +// Supported forms : (2 forms) +// +// * VINSERTF32X8 imm8, ymm, zmm, zmm{k}{z} [AVX512DQ] +// * VINSERTF32X8 imm8, m256, zmm, zmm{k}{z} [AVX512DQ] +// +func (self *Program) VINSERTF32X8(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTF32X8", 4, Operands { v0, v1, v2, v3 }) + // VINSERTF32X8 imm8, ymm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x1a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF32X8 imm8, m256, zmm, zmm{k}{z} + if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x1a) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTF32X8") + } + return p +} + +// VINSERTF64X2 performs "Insert 128 Bits of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VINSERTF64X2 +// Supported forms : (4 forms) +// +// * VINSERTF64X2 imm8, xmm, zmm, zmm{k}{z} [AVX512DQ] +// * VINSERTF64X2 imm8, m128, zmm, zmm{k}{z} [AVX512DQ] +// * VINSERTF64X2 imm8, xmm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VINSERTF64X2 imm8, m128, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VINSERTF64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTF64X2", 4, Operands { v0, v1, v2, v3 }) + // VINSERTF64X2 imm8, xmm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x18) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF64X2 imm8, m128, zmm, zmm{k}{z} + if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x18) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF64X2 imm8, xmm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x18) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF64X2 imm8, m128, ymm, ymm{k}{z} + if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x18) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTF64X2") + } + return p +} + +// VINSERTF64X4 performs "Insert 256 Bits of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VINSERTF64X4 +// Supported forms : (2 forms) +// +// * VINSERTF64X4 imm8, ymm, zmm, zmm{k}{z} [AVX512F] +// * VINSERTF64X4 imm8, m256, zmm, zmm{k}{z} [AVX512F] +// +func (self *Program) VINSERTF64X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTF64X4", 4, Operands { v0, v1, v2, v3 }) + // VINSERTF64X4 imm8, ymm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x1a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF64X4 imm8, m256, zmm, zmm{k}{z} + if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x1a) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTF64X4") + } + return p +} + +// VINSERTI128 performs "Insert Packed Integer Values". +// +// Mnemonic : VINSERTI128 +// Supported forms : (2 forms) +// +// * VINSERTI128 imm8, xmm, ymm, ymm [AVX2] +// * VINSERTI128 imm8, m128, ymm, ymm [AVX2] +// +func (self *Program) VINSERTI128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTI128", 4, Operands { v0, v1, v2, v3 }) + // VINSERTI128 imm8, xmm, ymm, ymm + if isImm8(v0) && isXMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x38) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI128 imm8, m128, ymm, ymm + if isImm8(v0) && isM128(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x38) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTI128") + } + return p +} + +// VINSERTI32X4 performs "Insert 128 Bits of Packed Doubleword Integer Values". +// +// Mnemonic : VINSERTI32X4 +// Supported forms : (4 forms) +// +// * VINSERTI32X4 imm8, xmm, zmm, zmm{k}{z} [AVX512F] +// * VINSERTI32X4 imm8, m128, zmm, zmm{k}{z} [AVX512F] +// * VINSERTI32X4 imm8, xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VINSERTI32X4 imm8, m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VINSERTI32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTI32X4", 4, Operands { v0, v1, v2, v3 }) + // VINSERTI32X4 imm8, xmm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x38) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI32X4 imm8, m128, zmm, zmm{k}{z} + if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x38) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI32X4 imm8, xmm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x38) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI32X4 imm8, m128, ymm, ymm{k}{z} + if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x38) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTI32X4") + } + return p +} + +// VINSERTI32X8 performs "Insert 256 Bits of Packed Doubleword Integer Values". +// +// Mnemonic : VINSERTI32X8 +// Supported forms : (2 forms) +// +// * VINSERTI32X8 imm8, ymm, zmm, zmm{k}{z} [AVX512DQ] +// * VINSERTI32X8 imm8, m256, zmm, zmm{k}{z} [AVX512DQ] +// +func (self *Program) VINSERTI32X8(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTI32X8", 4, Operands { v0, v1, v2, v3 }) + // VINSERTI32X8 imm8, ymm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI32X8 imm8, m256, zmm, zmm{k}{z} + if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x3a) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTI32X8") + } + return p +} + +// VINSERTI64X2 performs "Insert 128 Bits of Packed Quadword Integer Values". +// +// Mnemonic : VINSERTI64X2 +// Supported forms : (4 forms) +// +// * VINSERTI64X2 imm8, xmm, zmm, zmm{k}{z} [AVX512DQ] +// * VINSERTI64X2 imm8, m128, zmm, zmm{k}{z} [AVX512DQ] +// * VINSERTI64X2 imm8, xmm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VINSERTI64X2 imm8, m128, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VINSERTI64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTI64X2", 4, Operands { v0, v1, v2, v3 }) + // VINSERTI64X2 imm8, xmm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x38) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI64X2 imm8, m128, zmm, zmm{k}{z} + if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x38) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI64X2 imm8, xmm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x38) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI64X2 imm8, m128, ymm, ymm{k}{z} + if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x38) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTI64X2") + } + return p +} + +// VINSERTI64X4 performs "Insert 256 Bits of Packed Quadword Integer Values". +// +// Mnemonic : VINSERTI64X4 +// Supported forms : (2 forms) +// +// * VINSERTI64X4 imm8, ymm, zmm, zmm{k}{z} [AVX512F] +// * VINSERTI64X4 imm8, m256, zmm, zmm{k}{z} [AVX512F] +// +func (self *Program) VINSERTI64X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTI64X4", 4, Operands { v0, v1, v2, v3 }) + // VINSERTI64X4 imm8, ymm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI64X4 imm8, m256, zmm, zmm{k}{z} + if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x3a) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTI64X4") + } + return p +} + +// VINSERTPS performs "Insert Packed Single Precision Floating-Point Value". +// +// Mnemonic : VINSERTPS +// Supported forms : (4 forms) +// +// * VINSERTPS imm8, xmm, xmm, xmm [AVX] +// * VINSERTPS imm8, m32, xmm, xmm [AVX] +// * VINSERTPS imm8, xmm, xmm, xmm [AVX512F] +// * VINSERTPS imm8, m32, xmm, xmm [AVX512F] +// +func (self *Program) VINSERTPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTPS", 4, Operands { v0, v1, v2, v3 }) + // VINSERTPS imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x21) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTPS imm8, m32, xmm, xmm + if isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x21) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTPS imm8, xmm, xmm, xmm + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00) + m.emit(0x21) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTPS imm8, m32, xmm, xmm + if isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x21) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTPS") + } + return p +} + +// VLDDQU performs "Load Unaligned Integer 128 Bits". +// +// Mnemonic : VLDDQU +// Supported forms : (2 forms) +// +// * VLDDQU m128, xmm [AVX] +// * VLDDQU m256, ymm [AVX] +// +func (self *Program) VLDDQU(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VLDDQU", 2, Operands { v0, v1 }) + // VLDDQU m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), addr(v[0]), 0) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VLDDQU m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[1]), addr(v[0]), 0) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VLDDQU") + } + return p +} + +// VLDMXCSR performs "Load MXCSR Register". +// +// Mnemonic : VLDMXCSR +// Supported forms : (1 form) +// +// * VLDMXCSR m32 [AVX] +// +func (self *Program) VLDMXCSR(v0 interface{}) *Instruction { + p := self.alloc("VLDMXCSR", 1, Operands { v0 }) + // VLDMXCSR m32 + if isM32(v0) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, addr(v[0]), 0) + m.emit(0xae) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VLDMXCSR") + } + return p +} + +// VMASKMOVDQU performs "Store Selected Bytes of Double Quadword". +// +// Mnemonic : VMASKMOVDQU +// Supported forms : (1 form) +// +// * VMASKMOVDQU xmm, xmm [AVX] +// +func (self *Program) VMASKMOVDQU(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMASKMOVDQU", 2, Operands { v0, v1 }) + // VMASKMOVDQU xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMASKMOVDQU") + } + return p +} + +// VMASKMOVPD performs "Conditional Move Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VMASKMOVPD +// Supported forms : (4 forms) +// +// * VMASKMOVPD m128, xmm, xmm [AVX] +// * VMASKMOVPD m256, ymm, ymm [AVX] +// * VMASKMOVPD xmm, xmm, m128 [AVX] +// * VMASKMOVPD ymm, ymm, m256 [AVX] +// +func (self *Program) VMASKMOVPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VMASKMOVPD", 3, Operands { v0, v1, v2 }) + // VMASKMOVPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMASKMOVPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMASKMOVPD xmm, xmm, m128 + if isXMM(v0) && isXMM(v1) && isM128(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x2f) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + // VMASKMOVPD ymm, ymm, m256 + if isYMM(v0) && isYMM(v1) && isM256(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x2f) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VMASKMOVPD") + } + return p +} + +// VMASKMOVPS performs "Conditional Move Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMASKMOVPS +// Supported forms : (4 forms) +// +// * VMASKMOVPS m128, xmm, xmm [AVX] +// * VMASKMOVPS m256, ymm, ymm [AVX] +// * VMASKMOVPS xmm, xmm, m128 [AVX] +// * VMASKMOVPS ymm, ymm, m256 [AVX] +// +func (self *Program) VMASKMOVPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VMASKMOVPS", 3, Operands { v0, v1, v2 }) + // VMASKMOVPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMASKMOVPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMASKMOVPS xmm, xmm, m128 + if isXMM(v0) && isXMM(v1) && isM128(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x2e) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + // VMASKMOVPS ymm, ymm, m256 + if isYMM(v0) && isYMM(v1) && isM256(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x2e) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VMASKMOVPS") + } + return p +} + +// VMAXPD performs "Return Maximum Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VMAXPD +// Supported forms : (11 forms) +// +// * VMAXPD xmm, xmm, xmm [AVX] +// * VMAXPD m128, xmm, xmm [AVX] +// * VMAXPD ymm, ymm, ymm [AVX] +// * VMAXPD m256, ymm, ymm [AVX] +// * VMAXPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VMAXPD {sae}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VMAXPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VMAXPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMAXPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMAXPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMAXPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMAXPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMAXPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMAXPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMAXPD takes 3 or 4 operands") + } + // VMAXPD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMAXPD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMAXPD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VMAXPD {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMAXPD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VMAXPD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VMAXPD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMAXPD") + } + return p +} + +// VMAXPS performs "Return Maximum Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMAXPS +// Supported forms : (11 forms) +// +// * VMAXPS xmm, xmm, xmm [AVX] +// * VMAXPS m128, xmm, xmm [AVX] +// * VMAXPS ymm, ymm, ymm [AVX] +// * VMAXPS m256, ymm, ymm [AVX] +// * VMAXPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VMAXPS {sae}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VMAXPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VMAXPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMAXPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMAXPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMAXPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMAXPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMAXPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMAXPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMAXPS takes 3 or 4 operands") + } + // VMAXPS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMAXPS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMAXPS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VMAXPS {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMAXPS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VMAXPS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VMAXPS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMAXPS") + } + return p +} + +// VMAXSD performs "Return Maximum Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VMAXSD +// Supported forms : (5 forms) +// +// * VMAXSD xmm, xmm, xmm [AVX] +// * VMAXSD m64, xmm, xmm [AVX] +// * VMAXSD m64, xmm, xmm{k}{z} [AVX512F] +// * VMAXSD {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VMAXSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMAXSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMAXSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMAXSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMAXSD takes 3 or 4 operands") + } + // VMAXSD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXSD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMAXSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VMAXSD {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMAXSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMAXSD") + } + return p +} + +// VMAXSS performs "Return Maximum Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VMAXSS +// Supported forms : (5 forms) +// +// * VMAXSS xmm, xmm, xmm [AVX] +// * VMAXSS m32, xmm, xmm [AVX] +// * VMAXSS m32, xmm, xmm{k}{z} [AVX512F] +// * VMAXSS {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VMAXSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMAXSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMAXSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMAXSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMAXSS takes 3 or 4 operands") + } + // VMAXSS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXSS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMAXSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VMAXSS {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMAXSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMAXSS") + } + return p +} + +// VMINPD performs "Return Minimum Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VMINPD +// Supported forms : (11 forms) +// +// * VMINPD xmm, xmm, xmm [AVX] +// * VMINPD m128, xmm, xmm [AVX] +// * VMINPD ymm, ymm, ymm [AVX] +// * VMINPD m256, ymm, ymm [AVX] +// * VMINPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VMINPD {sae}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VMINPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VMINPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMINPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMINPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMINPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMINPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMINPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMINPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMINPD takes 3 or 4 operands") + } + // VMINPD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMINPD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMINPD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VMINPD {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMINPD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VMINPD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VMINPD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMINPD") + } + return p +} + +// VMINPS performs "Return Minimum Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMINPS +// Supported forms : (11 forms) +// +// * VMINPS xmm, xmm, xmm [AVX] +// * VMINPS m128, xmm, xmm [AVX] +// * VMINPS ymm, ymm, ymm [AVX] +// * VMINPS m256, ymm, ymm [AVX] +// * VMINPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VMINPS {sae}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VMINPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VMINPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMINPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMINPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMINPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMINPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMINPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMINPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMINPS takes 3 or 4 operands") + } + // VMINPS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMINPS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMINPS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VMINPS {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMINPS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VMINPS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VMINPS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMINPS") + } + return p +} + +// VMINSD performs "Return Minimum Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VMINSD +// Supported forms : (5 forms) +// +// * VMINSD xmm, xmm, xmm [AVX] +// * VMINSD m64, xmm, xmm [AVX] +// * VMINSD m64, xmm, xmm{k}{z} [AVX512F] +// * VMINSD {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VMINSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMINSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMINSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMINSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMINSD takes 3 or 4 operands") + } + // VMINSD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINSD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMINSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VMINSD {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMINSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMINSD") + } + return p +} + +// VMINSS performs "Return Minimum Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VMINSS +// Supported forms : (5 forms) +// +// * VMINSS xmm, xmm, xmm [AVX] +// * VMINSS m32, xmm, xmm [AVX] +// * VMINSS m32, xmm, xmm{k}{z} [AVX512F] +// * VMINSS {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VMINSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMINSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMINSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMINSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMINSS takes 3 or 4 operands") + } + // VMINSS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINSS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMINSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VMINSS {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMINSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMINSS") + } + return p +} + +// VMOVAPD performs "Move Aligned Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VMOVAPD +// Supported forms : (15 forms) +// +// * VMOVAPD xmm, xmm [AVX] +// * VMOVAPD m128, xmm [AVX] +// * VMOVAPD ymm, ymm [AVX] +// * VMOVAPD m256, ymm [AVX] +// * VMOVAPD xmm, m128 [AVX] +// * VMOVAPD ymm, m256 [AVX] +// * VMOVAPD zmm, m512{k}{z} [AVX512F] +// * VMOVAPD zmm, zmm{k}{z} [AVX512F] +// * VMOVAPD m512, zmm{k}{z} [AVX512F] +// * VMOVAPD xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVAPD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVAPD ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVAPD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVAPD m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVAPD m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVAPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVAPD", 2, Operands { v0, v1 }) + // VMOVAPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), v[1], 0) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVAPD ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), v[1], 0) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPD m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), addr(v[0]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVAPD xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVAPD ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), addr(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVAPD zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVAPD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPD m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVAPD xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVAPD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPD ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVAPD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPD m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVAPD m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVAPD") + } + return p +} + +// VMOVAPS performs "Move Aligned Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMOVAPS +// Supported forms : (15 forms) +// +// * VMOVAPS xmm, xmm [AVX] +// * VMOVAPS m128, xmm [AVX] +// * VMOVAPS ymm, ymm [AVX] +// * VMOVAPS m256, ymm [AVX] +// * VMOVAPS xmm, m128 [AVX] +// * VMOVAPS ymm, m256 [AVX] +// * VMOVAPS zmm, m512{k}{z} [AVX512F] +// * VMOVAPS zmm, zmm{k}{z} [AVX512F] +// * VMOVAPS m512, zmm{k}{z} [AVX512F] +// * VMOVAPS xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVAPS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVAPS ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVAPS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVAPS m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVAPS m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVAPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVAPS", 2, Operands { v0, v1 }) + // VMOVAPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[0]), v[1], 0) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVAPS ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[0]), v[1], 0) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPS m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), addr(v[0]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVAPS xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[0]), addr(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVAPS ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[0]), addr(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVAPS zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVAPS zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPS m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVAPS xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVAPS xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPS ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVAPS ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPS m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVAPS m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVAPS") + } + return p +} + +// VMOVD performs "Move Doubleword". +// +// Mnemonic : VMOVD +// Supported forms : (8 forms) +// +// * VMOVD xmm, r32 [AVX] +// * VMOVD r32, xmm [AVX] +// * VMOVD m32, xmm [AVX] +// * VMOVD xmm, m32 [AVX] +// * VMOVD xmm, r32 [AVX512F] +// * VMOVD r32, xmm [AVX512F] +// * VMOVD m32, xmm [AVX512F] +// * VMOVD xmm, m32 [AVX512F] +// +func (self *Program) VMOVD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVD", 2, Operands { v0, v1 }) + // VMOVD xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), v[1], 0) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVD r32, xmm + if isReg32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVD xmm, m32 + if isXMM(v0) && isM32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVD xmm, r32 + if isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit(0x08) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVD r32, xmm + if isReg32(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit(0x08) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVD m32, xmm + if isM32(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VMOVD xmm, m32 + if isEVEXXMM(v0) && isM32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVD") + } + return p +} + +// VMOVDDUP performs "Move One Double-FP and Duplicate". +// +// Mnemonic : VMOVDDUP +// Supported forms : (10 forms) +// +// * VMOVDDUP xmm, xmm [AVX] +// * VMOVDDUP m64, xmm [AVX] +// * VMOVDDUP ymm, ymm [AVX] +// * VMOVDDUP m256, ymm [AVX] +// * VMOVDDUP zmm, zmm{k}{z} [AVX512F] +// * VMOVDDUP m512, zmm{k}{z} [AVX512F] +// * VMOVDDUP xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDDUP ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVDDUP m64, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDDUP m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVDDUP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDDUP", 2, Operands { v0, v1 }) + // VMOVDDUP xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), v[0], 0) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVDDUP m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), addr(v[0]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVDDUP ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[1]), v[0], 0) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVDDUP m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[1]), addr(v[0]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVDDUP zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVDDUP m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVDDUP xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVDDUP ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVDDUP m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VMOVDDUP m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDDUP") + } + return p +} + +// VMOVDQA performs "Move Aligned Double Quadword". +// +// Mnemonic : VMOVDQA +// Supported forms : (6 forms) +// +// * VMOVDQA xmm, xmm [AVX] +// * VMOVDQA m128, xmm [AVX] +// * VMOVDQA ymm, ymm [AVX] +// * VMOVDQA m256, ymm [AVX] +// * VMOVDQA xmm, m128 [AVX] +// * VMOVDQA ymm, m256 [AVX] +// +func (self *Program) VMOVDQA(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQA", 2, Operands { v0, v1 }) + // VMOVDQA xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), v[1], 0) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVDQA ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), v[1], 0) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), addr(v[0]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVDQA xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVDQA ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), addr(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQA") + } + return p +} + +// VMOVDQA32 performs "Move Aligned Doubleword Values". +// +// Mnemonic : VMOVDQA32 +// Supported forms : (9 forms) +// +// * VMOVDQA32 zmm, m512{k}{z} [AVX512F] +// * VMOVDQA32 zmm, zmm{k}{z} [AVX512F] +// * VMOVDQA32 m512, zmm{k}{z} [AVX512F] +// * VMOVDQA32 xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA32 xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA32 ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA32 ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA32 m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA32 m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVDQA32(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQA32", 2, Operands { v0, v1 }) + // VMOVDQA32 zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVDQA32 zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA32 m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVDQA32 xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVDQA32 xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA32 ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVDQA32 ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA32 m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVDQA32 m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQA32") + } + return p +} + +// VMOVDQA64 performs "Move Aligned Quadword Values". +// +// Mnemonic : VMOVDQA64 +// Supported forms : (9 forms) +// +// * VMOVDQA64 zmm, m512{k}{z} [AVX512F] +// * VMOVDQA64 zmm, zmm{k}{z} [AVX512F] +// * VMOVDQA64 m512, zmm{k}{z} [AVX512F] +// * VMOVDQA64 xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA64 xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA64 ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA64 ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA64 m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA64 m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVDQA64(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQA64", 2, Operands { v0, v1 }) + // VMOVDQA64 zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVDQA64 zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA64 m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVDQA64 xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVDQA64 xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA64 ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVDQA64 ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA64 m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVDQA64 m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQA64") + } + return p +} + +// VMOVDQU performs "Move Unaligned Double Quadword". +// +// Mnemonic : VMOVDQU +// Supported forms : (6 forms) +// +// * VMOVDQU xmm, xmm [AVX] +// * VMOVDQU m128, xmm [AVX] +// * VMOVDQU ymm, ymm [AVX] +// * VMOVDQU m256, ymm [AVX] +// * VMOVDQU xmm, m128 [AVX] +// * VMOVDQU ymm, m256 [AVX] +// +func (self *Program) VMOVDQU(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQU", 2, Operands { v0, v1 }) + // VMOVDQU xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[0]), v[1], 0) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVDQU ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), v[0], 0) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[0]), v[1], 0) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), addr(v[0]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVDQU xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[0]), addr(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVDQU ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[0]), addr(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQU") + } + return p +} + +// VMOVDQU16 performs "Move Unaligned Word Values". +// +// Mnemonic : VMOVDQU16 +// Supported forms : (9 forms) +// +// * VMOVDQU16 zmm, m512{k}{z} [AVX512BW] +// * VMOVDQU16 zmm, zmm{k}{z} [AVX512BW] +// * VMOVDQU16 m512, zmm{k}{z} [AVX512BW] +// * VMOVDQU16 xmm, m128{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU16 xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU16 ymm, m256{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU16 ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU16 m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU16 m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VMOVDQU16(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQU16", 2, Operands { v0, v1 }) + // VMOVDQU16 zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVDQU16 zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU16 m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVDQU16 xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVDQU16 xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU16 ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVDQU16 ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU16 m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVDQU16 m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQU16") + } + return p +} + +// VMOVDQU32 performs "Move Unaligned Doubleword Values". +// +// Mnemonic : VMOVDQU32 +// Supported forms : (9 forms) +// +// * VMOVDQU32 zmm, m512{k}{z} [AVX512F] +// * VMOVDQU32 zmm, zmm{k}{z} [AVX512F] +// * VMOVDQU32 m512, zmm{k}{z} [AVX512F] +// * VMOVDQU32 xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU32 xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU32 ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU32 ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU32 m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU32 m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVDQU32(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQU32", 2, Operands { v0, v1 }) + // VMOVDQU32 zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVDQU32 zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU32 m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVDQU32 xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVDQU32 xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU32 ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVDQU32 ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU32 m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVDQU32 m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQU32") + } + return p +} + +// VMOVDQU64 performs "Move Unaligned Quadword Values". +// +// Mnemonic : VMOVDQU64 +// Supported forms : (9 forms) +// +// * VMOVDQU64 zmm, m512{k}{z} [AVX512F] +// * VMOVDQU64 zmm, zmm{k}{z} [AVX512F] +// * VMOVDQU64 m512, zmm{k}{z} [AVX512F] +// * VMOVDQU64 xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU64 xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU64 ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU64 ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU64 m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU64 m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVDQU64(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQU64", 2, Operands { v0, v1 }) + // VMOVDQU64 zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVDQU64 zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU64 m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVDQU64 xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVDQU64 xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU64 ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVDQU64 ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU64 m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVDQU64 m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQU64") + } + return p +} + +// VMOVDQU8 performs "Move Unaligned Byte Values". +// +// Mnemonic : VMOVDQU8 +// Supported forms : (9 forms) +// +// * VMOVDQU8 zmm, m512{k}{z} [AVX512BW] +// * VMOVDQU8 zmm, zmm{k}{z} [AVX512BW] +// * VMOVDQU8 m512, zmm{k}{z} [AVX512BW] +// * VMOVDQU8 xmm, m128{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU8 xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU8 ymm, m256{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU8 ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU8 m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU8 m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VMOVDQU8(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQU8", 2, Operands { v0, v1 }) + // VMOVDQU8 zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVDQU8 zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU8 m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVDQU8 xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVDQU8 xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU8 ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVDQU8 ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU8 m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVDQU8 m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQU8") + } + return p +} + +// VMOVHLPS performs "Move Packed Single-Precision Floating-Point Values High to Low". +// +// Mnemonic : VMOVHLPS +// Supported forms : (2 forms) +// +// * VMOVHLPS xmm, xmm, xmm [AVX] +// * VMOVHLPS xmm, xmm, xmm [AVX512F] +// +func (self *Program) VMOVHLPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VMOVHLPS", 3, Operands { v0, v1, v2 }) + // VMOVHLPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x12) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMOVHLPS xmm, xmm, xmm + if isEVEXXMM(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00) + m.emit(0x12) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVHLPS") + } + return p +} + +// VMOVHPD performs "Move High Packed Double-Precision Floating-Point Value". +// +// Mnemonic : VMOVHPD +// Supported forms : (4 forms) +// +// * VMOVHPD xmm, m64 [AVX] +// * VMOVHPD m64, xmm, xmm [AVX] +// * VMOVHPD xmm, m64 [AVX512F] +// * VMOVHPD m64, xmm, xmm [AVX512F] +// +func (self *Program) VMOVHPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMOVHPD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VMOVHPD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VMOVHPD takes 2 or 3 operands") + } + // VMOVHPD xmm, m64 + if len(vv) == 0 && isXMM(v0) && isM64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0x17) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVHPD m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMOVHPD xmm, m64 + if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x17) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VMOVHPD m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVHPD") + } + return p +} + +// VMOVHPS performs "Move High Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMOVHPS +// Supported forms : (4 forms) +// +// * VMOVHPS xmm, m64 [AVX] +// * VMOVHPS m64, xmm, xmm [AVX] +// * VMOVHPS xmm, m64 [AVX512F] +// * VMOVHPS m64, xmm, xmm [AVX512F] +// +func (self *Program) VMOVHPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMOVHPS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VMOVHPS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VMOVHPS takes 2 or 3 operands") + } + // VMOVHPS xmm, m64 + if len(vv) == 0 && isXMM(v0) && isM64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[0]), addr(v[1]), 0) + m.emit(0x17) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVHPS m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMOVHPS xmm, m64 + if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x17) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VMOVHPS m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVHPS") + } + return p +} + +// VMOVLHPS performs "Move Packed Single-Precision Floating-Point Values Low to High". +// +// Mnemonic : VMOVLHPS +// Supported forms : (2 forms) +// +// * VMOVLHPS xmm, xmm, xmm [AVX] +// * VMOVLHPS xmm, xmm, xmm [AVX512F] +// +func (self *Program) VMOVLHPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VMOVLHPS", 3, Operands { v0, v1, v2 }) + // VMOVLHPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x16) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMOVLHPS xmm, xmm, xmm + if isEVEXXMM(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00) + m.emit(0x16) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVLHPS") + } + return p +} + +// VMOVLPD performs "Move Low Packed Double-Precision Floating-Point Value". +// +// Mnemonic : VMOVLPD +// Supported forms : (4 forms) +// +// * VMOVLPD xmm, m64 [AVX] +// * VMOVLPD m64, xmm, xmm [AVX] +// * VMOVLPD xmm, m64 [AVX512F] +// * VMOVLPD m64, xmm, xmm [AVX512F] +// +func (self *Program) VMOVLPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMOVLPD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VMOVLPD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VMOVLPD takes 2 or 3 operands") + } + // VMOVLPD xmm, m64 + if len(vv) == 0 && isXMM(v0) && isM64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVLPD m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x12) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMOVLPD xmm, m64 + if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VMOVLPD m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x12) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVLPD") + } + return p +} + +// VMOVLPS performs "Move Low Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMOVLPS +// Supported forms : (4 forms) +// +// * VMOVLPS xmm, m64 [AVX] +// * VMOVLPS m64, xmm, xmm [AVX] +// * VMOVLPS xmm, m64 [AVX512F] +// * VMOVLPS m64, xmm, xmm [AVX512F] +// +func (self *Program) VMOVLPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMOVLPS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VMOVLPS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VMOVLPS takes 2 or 3 operands") + } + // VMOVLPS xmm, m64 + if len(vv) == 0 && isXMM(v0) && isM64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[0]), addr(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVLPS m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x12) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMOVLPS xmm, m64 + if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VMOVLPS m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x12) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVLPS") + } + return p +} + +// VMOVMSKPD performs "Extract Packed Double-Precision Floating-Point Sign Mask". +// +// Mnemonic : VMOVMSKPD +// Supported forms : (2 forms) +// +// * VMOVMSKPD xmm, r32 [AVX] +// * VMOVMSKPD ymm, r32 [AVX] +// +func (self *Program) VMOVMSKPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVMSKPD", 2, Operands { v0, v1 }) + // VMOVMSKPD xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x50) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVMSKPD ymm, r32 + if isYMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0x50) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVMSKPD") + } + return p +} + +// VMOVMSKPS performs "Extract Packed Single-Precision Floating-Point Sign Mask". +// +// Mnemonic : VMOVMSKPS +// Supported forms : (2 forms) +// +// * VMOVMSKPS xmm, r32 [AVX] +// * VMOVMSKPS ymm, r32 [AVX] +// +func (self *Program) VMOVMSKPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVMSKPS", 2, Operands { v0, v1 }) + // VMOVMSKPS xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x50) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVMSKPS ymm, r32 + if isYMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x50) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVMSKPS") + } + return p +} + +// VMOVNTDQ performs "Store Double Quadword Using Non-Temporal Hint". +// +// Mnemonic : VMOVNTDQ +// Supported forms : (5 forms) +// +// * VMOVNTDQ xmm, m128 [AVX] +// * VMOVNTDQ ymm, m256 [AVX] +// * VMOVNTDQ zmm, m512 [AVX512F] +// * VMOVNTDQ xmm, m128 [AVX512F,AVX512VL] +// * VMOVNTDQ ymm, m256 [AVX512F,AVX512VL] +// +func (self *Program) VMOVNTDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVNTDQ", 2, Operands { v0, v1 }) + // VMOVNTDQ xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0xe7) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVNTDQ ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), addr(v[1]), 0) + m.emit(0xe7) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVNTDQ zmm, m512 + if isZMM(v0) && isM512(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0xe7) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVNTDQ xmm, m128 + if isEVEXXMM(v0) && isM128(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0xe7) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVNTDQ ymm, m256 + if isEVEXYMM(v0) && isM256(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0xe7) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVNTDQ") + } + return p +} + +// VMOVNTDQA performs "Load Double Quadword Non-Temporal Aligned Hint". +// +// Mnemonic : VMOVNTDQA +// Supported forms : (5 forms) +// +// * VMOVNTDQA m128, xmm [AVX] +// * VMOVNTDQA m256, ymm [AVX2] +// * VMOVNTDQA m512, zmm [AVX512F] +// * VMOVNTDQA m128, xmm [AVX512F,AVX512VL] +// * VMOVNTDQA m256, ymm [AVX512F,AVX512VL] +// +func (self *Program) VMOVNTDQA(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVNTDQA", 2, Operands { v0, v1 }) + // VMOVNTDQA m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVNTDQA m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVNTDQA m512, zmm + if isM512(v0) && isZMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVNTDQA m128, xmm + if isM128(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVNTDQA m256, ymm + if isM256(v0) && isEVEXYMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVNTDQA") + } + return p +} + +// VMOVNTPD performs "Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint". +// +// Mnemonic : VMOVNTPD +// Supported forms : (5 forms) +// +// * VMOVNTPD xmm, m128 [AVX] +// * VMOVNTPD ymm, m256 [AVX] +// * VMOVNTPD zmm, m512 [AVX512F] +// * VMOVNTPD xmm, m128 [AVX512F,AVX512VL] +// * VMOVNTPD ymm, m256 [AVX512F,AVX512VL] +// +func (self *Program) VMOVNTPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVNTPD", 2, Operands { v0, v1 }) + // VMOVNTPD xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVNTPD ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), addr(v[1]), 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVNTPD zmm, m512 + if isZMM(v0) && isM512(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVNTPD xmm, m128 + if isEVEXXMM(v0) && isM128(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVNTPD ymm, m256 + if isEVEXYMM(v0) && isM256(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVNTPD") + } + return p +} + +// VMOVNTPS performs "Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint". +// +// Mnemonic : VMOVNTPS +// Supported forms : (5 forms) +// +// * VMOVNTPS xmm, m128 [AVX] +// * VMOVNTPS ymm, m256 [AVX] +// * VMOVNTPS zmm, m512 [AVX512F] +// * VMOVNTPS xmm, m128 [AVX512F,AVX512VL] +// * VMOVNTPS ymm, m256 [AVX512F,AVX512VL] +// +func (self *Program) VMOVNTPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVNTPS", 2, Operands { v0, v1 }) + // VMOVNTPS xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[0]), addr(v[1]), 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVNTPS ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[0]), addr(v[1]), 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVNTPS zmm, m512 + if isZMM(v0) && isM512(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVNTPS xmm, m128 + if isEVEXXMM(v0) && isM128(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVNTPS ymm, m256 + if isEVEXYMM(v0) && isM256(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVNTPS") + } + return p +} + +// VMOVQ performs "Move Quadword". +// +// Mnemonic : VMOVQ +// Supported forms : (10 forms) +// +// * VMOVQ xmm, r64 [AVX] +// * VMOVQ r64, xmm [AVX] +// * VMOVQ xmm, xmm [AVX] +// * VMOVQ m64, xmm [AVX] +// * VMOVQ xmm, m64 [AVX] +// * VMOVQ xmm, r64 [AVX512F] +// * VMOVQ r64, xmm [AVX512F] +// * VMOVQ xmm, xmm [AVX512F] +// * VMOVQ m64, xmm [AVX512F] +// * VMOVQ xmm, m64 [AVX512F] +// +func (self *Program) VMOVQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVQ", 2, Operands { v0, v1 }) + // VMOVQ xmm, r64 + if isXMM(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[0]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf9) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVQ r64, xmm + if isReg64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), v[1], 0) + m.emit(0xd6) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVQ m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x7e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x81, hcode(v[1]), addr(v[0]), 0) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVQ xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0xd6) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x81, hcode(v[0]), addr(v[1]), 0) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVQ xmm, r64 + if isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit(0x08) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVQ r64, xmm + if isReg64(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit(0x08) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVQ xmm, xmm + if isEVEXXMM(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x08) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit(0x08) + m.emit(0xd6) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVQ m64, xmm + if isM64(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x7e) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VMOVQ xmm, m64 + if isEVEXXMM(v0) && isM64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0xd6) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVQ") + } + return p +} + +// VMOVSD performs "Move Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VMOVSD +// Supported forms : (6 forms) +// +// * VMOVSD m64, xmm [AVX] +// * VMOVSD xmm, m64 [AVX] +// * VMOVSD xmm, xmm, xmm [AVX] +// * VMOVSD xmm, m64{k} [AVX512F] +// * VMOVSD m64, xmm{k}{z} [AVX512F] +// * VMOVSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMOVSD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMOVSD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VMOVSD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VMOVSD takes 2 or 3 operands") + } + // VMOVSD m64, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), addr(v[0]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVSD xmm, m64 + if len(vv) == 0 && isXMM(v0) && isM64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[0]), addr(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVSD xmm, xmm, xmm + if len(vv) == 1 && isXMM(v0) && isXMM(v1) && isXMM(vv[0]) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[0]), v[2], hlcode(v[1])) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2])) + }) + } + // VMOVSD xmm, m64{k} + if len(vv) == 0 && isEVEXXMM(v0) && isM64k(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VMOVSD m64, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VMOVSD xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2])) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVSD") + } + return p +} + +// VMOVSHDUP performs "Move Packed Single-FP High and Duplicate". +// +// Mnemonic : VMOVSHDUP +// Supported forms : (10 forms) +// +// * VMOVSHDUP xmm, xmm [AVX] +// * VMOVSHDUP m128, xmm [AVX] +// * VMOVSHDUP ymm, ymm [AVX] +// * VMOVSHDUP m256, ymm [AVX] +// * VMOVSHDUP zmm, zmm{k}{z} [AVX512F] +// * VMOVSHDUP m512, zmm{k}{z} [AVX512F] +// * VMOVSHDUP xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVSHDUP ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVSHDUP m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVSHDUP m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVSHDUP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVSHDUP", 2, Operands { v0, v1 }) + // VMOVSHDUP xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSHDUP m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVSHDUP ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), v[0], 0) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSHDUP m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), addr(v[0]), 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVSHDUP zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSHDUP m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVSHDUP xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSHDUP ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSHDUP m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVSHDUP m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVSHDUP") + } + return p +} + +// VMOVSLDUP performs "Move Packed Single-FP Low and Duplicate". +// +// Mnemonic : VMOVSLDUP +// Supported forms : (10 forms) +// +// * VMOVSLDUP xmm, xmm [AVX] +// * VMOVSLDUP m128, xmm [AVX] +// * VMOVSLDUP ymm, ymm [AVX] +// * VMOVSLDUP m256, ymm [AVX] +// * VMOVSLDUP zmm, zmm{k}{z} [AVX512F] +// * VMOVSLDUP m512, zmm{k}{z} [AVX512F] +// * VMOVSLDUP xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVSLDUP ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVSLDUP m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVSLDUP m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVSLDUP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVSLDUP", 2, Operands { v0, v1 }) + // VMOVSLDUP xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSLDUP m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVSLDUP ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), v[0], 0) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSLDUP m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), addr(v[0]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVSLDUP zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSLDUP m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVSLDUP xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSLDUP ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSLDUP m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVSLDUP m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVSLDUP") + } + return p +} + +// VMOVSS performs "Move Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VMOVSS +// Supported forms : (6 forms) +// +// * VMOVSS m32, xmm [AVX] +// * VMOVSS xmm, m32 [AVX] +// * VMOVSS xmm, xmm, xmm [AVX] +// * VMOVSS xmm, m32{k} [AVX512F] +// * VMOVSS m32, xmm{k}{z} [AVX512F] +// * VMOVSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMOVSS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMOVSS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VMOVSS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VMOVSS takes 2 or 3 operands") + } + // VMOVSS m32, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVSS xmm, m32 + if len(vv) == 0 && isXMM(v0) && isM32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[0]), addr(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVSS xmm, xmm, xmm + if len(vv) == 1 && isXMM(v0) && isXMM(v1) && isXMM(vv[0]) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[0]), v[2], hlcode(v[1])) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2])) + }) + } + // VMOVSS xmm, m32{k} + if len(vv) == 0 && isEVEXXMM(v0) && isM32k(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VMOVSS m32, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VMOVSS xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2])) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVSS") + } + return p +} + +// VMOVUPD performs "Move Unaligned Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VMOVUPD +// Supported forms : (15 forms) +// +// * VMOVUPD xmm, xmm [AVX] +// * VMOVUPD m128, xmm [AVX] +// * VMOVUPD ymm, ymm [AVX] +// * VMOVUPD m256, ymm [AVX] +// * VMOVUPD xmm, m128 [AVX] +// * VMOVUPD ymm, m256 [AVX] +// * VMOVUPD zmm, m512{k}{z} [AVX512F] +// * VMOVUPD zmm, zmm{k}{z} [AVX512F] +// * VMOVUPD m512, zmm{k}{z} [AVX512F] +// * VMOVUPD xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVUPD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVUPD ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVUPD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVUPD m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVUPD m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVUPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVUPD", 2, Operands { v0, v1 }) + // VMOVUPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), v[1], 0) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVUPD ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), v[1], 0) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPD m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), addr(v[0]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVUPD xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVUPD ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), addr(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVUPD zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVUPD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPD m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVUPD xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVUPD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPD ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVUPD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPD m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVUPD m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVUPD") + } + return p +} + +// VMOVUPS performs "Move Unaligned Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMOVUPS +// Supported forms : (15 forms) +// +// * VMOVUPS xmm, xmm [AVX] +// * VMOVUPS m128, xmm [AVX] +// * VMOVUPS ymm, ymm [AVX] +// * VMOVUPS m256, ymm [AVX] +// * VMOVUPS xmm, m128 [AVX] +// * VMOVUPS ymm, m256 [AVX] +// * VMOVUPS zmm, m512{k}{z} [AVX512F] +// * VMOVUPS zmm, zmm{k}{z} [AVX512F] +// * VMOVUPS m512, zmm{k}{z} [AVX512F] +// * VMOVUPS xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVUPS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVUPS ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVUPS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVUPS m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVUPS m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVUPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVUPS", 2, Operands { v0, v1 }) + // VMOVUPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[0]), v[1], 0) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVUPS ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[0]), v[1], 0) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPS m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), addr(v[0]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVUPS xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[0]), addr(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVUPS ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[0]), addr(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVUPS zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVUPS zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPS m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVUPS xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVUPS xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPS ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVUPS ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPS m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVUPS m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVUPS") + } + return p +} + +// VMPSADBW performs "Compute Multiple Packed Sums of Absolute Difference". +// +// Mnemonic : VMPSADBW +// Supported forms : (4 forms) +// +// * VMPSADBW imm8, xmm, xmm, xmm [AVX] +// * VMPSADBW imm8, m128, xmm, xmm [AVX] +// * VMPSADBW imm8, ymm, ymm, ymm [AVX2] +// * VMPSADBW imm8, m256, ymm, ymm [AVX2] +// +func (self *Program) VMPSADBW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VMPSADBW", 4, Operands { v0, v1, v2, v3 }) + // VMPSADBW imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x42) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VMPSADBW imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x42) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VMPSADBW imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x42) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VMPSADBW imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x42) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMPSADBW") + } + return p +} + +// VMULPD performs "Multiply Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VMULPD +// Supported forms : (11 forms) +// +// * VMULPD xmm, xmm, xmm [AVX] +// * VMULPD m128, xmm, xmm [AVX] +// * VMULPD ymm, ymm, ymm [AVX] +// * VMULPD m256, ymm, ymm [AVX] +// * VMULPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VMULPD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VMULPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VMULPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMULPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMULPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMULPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMULPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMULPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMULPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMULPD takes 3 or 4 operands") + } + // VMULPD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMULPD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMULPD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VMULPD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x59) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMULPD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VMULPD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VMULPD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMULPD") + } + return p +} + +// VMULPS performs "Multiply Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMULPS +// Supported forms : (11 forms) +// +// * VMULPS xmm, xmm, xmm [AVX] +// * VMULPS m128, xmm, xmm [AVX] +// * VMULPS ymm, ymm, ymm [AVX] +// * VMULPS m256, ymm, ymm [AVX] +// * VMULPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VMULPS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VMULPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VMULPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMULPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMULPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMULPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMULPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMULPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMULPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMULPS takes 3 or 4 operands") + } + // VMULPS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMULPS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMULPS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VMULPS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x59) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMULPS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VMULPS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VMULPS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMULPS") + } + return p +} + +// VMULSD performs "Multiply Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VMULSD +// Supported forms : (5 forms) +// +// * VMULSD xmm, xmm, xmm [AVX] +// * VMULSD m64, xmm, xmm [AVX] +// * VMULSD m64, xmm, xmm{k}{z} [AVX512F] +// * VMULSD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VMULSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMULSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMULSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMULSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMULSD takes 3 or 4 operands") + } + // VMULSD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULSD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMULSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VMULSD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x59) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMULSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMULSD") + } + return p +} + +// VMULSS performs "Multiply Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VMULSS +// Supported forms : (5 forms) +// +// * VMULSS xmm, xmm, xmm [AVX] +// * VMULSS m32, xmm, xmm [AVX] +// * VMULSS m32, xmm, xmm{k}{z} [AVX512F] +// * VMULSS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VMULSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMULSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMULSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMULSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMULSS takes 3 or 4 operands") + } + // VMULSS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULSS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMULSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VMULSS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x59) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMULSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMULSS") + } + return p +} + +// VORPD performs "Bitwise Logical OR of Double-Precision Floating-Point Values". +// +// Mnemonic : VORPD +// Supported forms : (10 forms) +// +// * VORPD xmm, xmm, xmm [AVX] +// * VORPD m128, xmm, xmm [AVX] +// * VORPD ymm, ymm, ymm [AVX] +// * VORPD m256, ymm, ymm [AVX] +// * VORPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VORPD zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VORPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VORPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VORPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VORPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VORPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VORPD", 3, Operands { v0, v1, v2 }) + // VORPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VORPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VORPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VORPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VORPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VORPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VORPD") + } + return p +} + +// VORPS performs "Bitwise Logical OR of Single-Precision Floating-Point Values". +// +// Mnemonic : VORPS +// Supported forms : (10 forms) +// +// * VORPS xmm, xmm, xmm [AVX] +// * VORPS m128, xmm, xmm [AVX] +// * VORPS ymm, ymm, ymm [AVX] +// * VORPS m256, ymm, ymm [AVX] +// * VORPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VORPS zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VORPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VORPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VORPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VORPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VORPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VORPS", 3, Operands { v0, v1, v2 }) + // VORPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VORPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VORPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VORPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VORPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VORPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VORPS") + } + return p +} + +// VPABSB performs "Packed Absolute Value of Byte Integers". +// +// Mnemonic : VPABSB +// Supported forms : (10 forms) +// +// * VPABSB xmm, xmm [AVX] +// * VPABSB m128, xmm [AVX] +// * VPABSB ymm, ymm [AVX2] +// * VPABSB m256, ymm [AVX2] +// * VPABSB zmm, zmm{k}{z} [AVX512BW] +// * VPABSB m512, zmm{k}{z} [AVX512BW] +// * VPABSB xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPABSB ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPABSB m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPABSB m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPABSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPABSB", 2, Operands { v0, v1 }) + // VPABSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x1c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x1c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPABSB ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x1c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSB m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x1c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPABSB zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x1c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSB m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1c) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPABSB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x1c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSB ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x1c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSB m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1c) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPABSB m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1c) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPABSB") + } + return p +} + +// VPABSD performs "Packed Absolute Value of Doubleword Integers". +// +// Mnemonic : VPABSD +// Supported forms : (10 forms) +// +// * VPABSD xmm, xmm [AVX] +// * VPABSD m128, xmm [AVX] +// * VPABSD ymm, ymm [AVX2] +// * VPABSD m256, ymm [AVX2] +// * VPABSD m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPABSD zmm, zmm{k}{z} [AVX512F] +// * VPABSD m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPABSD m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPABSD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPABSD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPABSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPABSD", 2, Operands { v0, v1 }) + // VPABSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x1e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPABSD ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSD m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x1e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPABSD m512/m32bcst, zmm{k}{z} + if isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x1e) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPABSD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSD m128/m32bcst, xmm{k}{z} + if isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x1e) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPABSD m256/m32bcst, ymm{k}{z} + if isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x1e) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPABSD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPABSD") + } + return p +} + +// VPABSQ performs "Packed Absolute Value of Quadword Integers". +// +// Mnemonic : VPABSQ +// Supported forms : (6 forms) +// +// * VPABSQ m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPABSQ zmm, zmm{k}{z} [AVX512F] +// * VPABSQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPABSQ m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPABSQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPABSQ ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPABSQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPABSQ", 2, Operands { v0, v1 }) + // VPABSQ m512/m64bcst, zmm{k}{z} + if isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x1f) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPABSQ zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSQ m128/m64bcst, xmm{k}{z} + if isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x1f) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPABSQ m256/m64bcst, ymm{k}{z} + if isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x1f) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPABSQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSQ ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPABSQ") + } + return p +} + +// VPABSW performs "Packed Absolute Value of Word Integers". +// +// Mnemonic : VPABSW +// Supported forms : (10 forms) +// +// * VPABSW xmm, xmm [AVX] +// * VPABSW m128, xmm [AVX] +// * VPABSW ymm, ymm [AVX2] +// * VPABSW m256, ymm [AVX2] +// * VPABSW zmm, zmm{k}{z} [AVX512BW] +// * VPABSW m512, zmm{k}{z} [AVX512BW] +// * VPABSW xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPABSW ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPABSW m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPABSW m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPABSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPABSW", 2, Operands { v0, v1 }) + // VPABSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPABSW ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSW m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPABSW zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSW m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPABSW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSW ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSW m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPABSW m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPABSW") + } + return p +} + +// VPACKSSDW performs "Pack Doublewords into Words with Signed Saturation". +// +// Mnemonic : VPACKSSDW +// Supported forms : (10 forms) +// +// * VPACKSSDW xmm, xmm, xmm [AVX] +// * VPACKSSDW m128, xmm, xmm [AVX] +// * VPACKSSDW ymm, ymm, ymm [AVX2] +// * VPACKSSDW m256, ymm, ymm [AVX2] +// * VPACKSSDW m512/m32bcst, zmm, zmm{k}{z} [AVX512BW] +// * VPACKSSDW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPACKSSDW m128/m32bcst, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKSSDW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKSSDW m256/m32bcst, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPACKSSDW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPACKSSDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPACKSSDW", 3, Operands { v0, v1, v2 }) + // VPACKSSDW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSDW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKSSDW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSDW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKSSDW m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPACKSSDW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSDW m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPACKSSDW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSDW m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPACKSSDW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPACKSSDW") + } + return p +} + +// VPACKSSWB performs "Pack Words into Bytes with Signed Saturation". +// +// Mnemonic : VPACKSSWB +// Supported forms : (10 forms) +// +// * VPACKSSWB xmm, xmm, xmm [AVX] +// * VPACKSSWB m128, xmm, xmm [AVX] +// * VPACKSSWB ymm, ymm, ymm [AVX2] +// * VPACKSSWB m256, ymm, ymm [AVX2] +// * VPACKSSWB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPACKSSWB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPACKSSWB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKSSWB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKSSWB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPACKSSWB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPACKSSWB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPACKSSWB", 3, Operands { v0, v1, v2 }) + // VPACKSSWB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x63) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSWB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x63) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKSSWB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x63) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSWB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x63) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKSSWB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x63) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSWB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x63) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPACKSSWB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x63) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSWB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x63) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPACKSSWB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x63) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSWB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x63) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPACKSSWB") + } + return p +} + +// VPACKUSDW performs "Pack Doublewords into Words with Unsigned Saturation". +// +// Mnemonic : VPACKUSDW +// Supported forms : (10 forms) +// +// * VPACKUSDW xmm, xmm, xmm [AVX] +// * VPACKUSDW m128, xmm, xmm [AVX] +// * VPACKUSDW ymm, ymm, ymm [AVX2] +// * VPACKUSDW m256, ymm, ymm [AVX2] +// * VPACKUSDW m512/m32bcst, zmm, zmm{k}{z} [AVX512BW] +// * VPACKUSDW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPACKUSDW m128/m32bcst, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKUSDW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKUSDW m256/m32bcst, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPACKUSDW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPACKUSDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPACKUSDW", 3, Operands { v0, v1, v2 }) + // VPACKUSDW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSDW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKUSDW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSDW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKUSDW m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2b) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPACKUSDW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSDW m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2b) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPACKUSDW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSDW m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2b) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPACKUSDW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPACKUSDW") + } + return p +} + +// VPACKUSWB performs "Pack Words into Bytes with Unsigned Saturation". +// +// Mnemonic : VPACKUSWB +// Supported forms : (10 forms) +// +// * VPACKUSWB xmm, xmm, xmm [AVX] +// * VPACKUSWB m128, xmm, xmm [AVX] +// * VPACKUSWB ymm, ymm, ymm [AVX2] +// * VPACKUSWB m256, ymm, ymm [AVX2] +// * VPACKUSWB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPACKUSWB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPACKUSWB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKUSWB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKUSWB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPACKUSWB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPACKUSWB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPACKUSWB", 3, Operands { v0, v1, v2 }) + // VPACKUSWB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x67) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSWB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x67) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKUSWB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x67) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSWB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x67) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKUSWB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x67) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSWB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x67) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPACKUSWB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x67) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSWB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x67) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPACKUSWB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x67) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSWB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x67) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPACKUSWB") + } + return p +} + +// VPADDB performs "Add Packed Byte Integers". +// +// Mnemonic : VPADDB +// Supported forms : (10 forms) +// +// * VPADDB xmm, xmm, xmm [AVX] +// * VPADDB m128, xmm, xmm [AVX] +// * VPADDB ymm, ymm, ymm [AVX2] +// * VPADDB m256, ymm, ymm [AVX2] +// * VPADDB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPADDB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPADDB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPADDB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPADDB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDB", 3, Operands { v0, v1, v2 }) + // VPADDB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xfc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xfc) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xfc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xfc) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xfc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xfc) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDB") + } + return p +} + +// VPADDD performs "Add Packed Doubleword Integers". +// +// Mnemonic : VPADDD +// Supported forms : (10 forms) +// +// * VPADDD xmm, xmm, xmm [AVX] +// * VPADDD m128, xmm, xmm [AVX] +// * VPADDD ymm, ymm, ymm [AVX2] +// * VPADDD m256, ymm, ymm [AVX2] +// * VPADDD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPADDD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPADDD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPADDD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPADDD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPADDD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPADDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDD", 3, Operands { v0, v1, v2 }) + // VPADDD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfe) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfe) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfe) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfe) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfe) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPADDD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDD") + } + return p +} + +// VPADDQ performs "Add Packed Quadword Integers". +// +// Mnemonic : VPADDQ +// Supported forms : (10 forms) +// +// * VPADDQ xmm, xmm, xmm [AVX] +// * VPADDQ m128, xmm, xmm [AVX] +// * VPADDQ ymm, ymm, ymm [AVX2] +// * VPADDQ m256, ymm, ymm [AVX2] +// * VPADDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPADDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPADDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPADDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPADDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPADDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPADDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDQ", 3, Operands { v0, v1, v2 }) + // VPADDQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd4) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd4) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xd4) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xd4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xd4) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xd4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xd4) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPADDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xd4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDQ") + } + return p +} + +// VPADDSB performs "Add Packed Signed Byte Integers with Signed Saturation". +// +// Mnemonic : VPADDSB +// Supported forms : (10 forms) +// +// * VPADDSB xmm, xmm, xmm [AVX] +// * VPADDSB m128, xmm, xmm [AVX] +// * VPADDSB ymm, ymm, ymm [AVX2] +// * VPADDSB m256, ymm, ymm [AVX2] +// * VPADDSB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPADDSB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPADDSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPADDSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPADDSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDSB", 3, Operands { v0, v1, v2 }) + // VPADDSB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xec) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xec) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDSB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xec) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xec) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDSB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xec) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xec) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDSB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xec) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xec) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDSB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xec) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xec) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDSB") + } + return p +} + +// VPADDSW performs "Add Packed Signed Word Integers with Signed Saturation". +// +// Mnemonic : VPADDSW +// Supported forms : (10 forms) +// +// * VPADDSW xmm, xmm, xmm [AVX] +// * VPADDSW m128, xmm, xmm [AVX] +// * VPADDSW ymm, ymm, ymm [AVX2] +// * VPADDSW m256, ymm, ymm [AVX2] +// * VPADDSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPADDSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPADDSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPADDSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPADDSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDSW", 3, Operands { v0, v1, v2 }) + // VPADDSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xed) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xed) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xed) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xed) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xed) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xed) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xed) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xed) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xed) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xed) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDSW") + } + return p +} + +// VPADDUSB performs "Add Packed Unsigned Byte Integers with Unsigned Saturation". +// +// Mnemonic : VPADDUSB +// Supported forms : (10 forms) +// +// * VPADDUSB xmm, xmm, xmm [AVX] +// * VPADDUSB m128, xmm, xmm [AVX] +// * VPADDUSB ymm, ymm, ymm [AVX2] +// * VPADDUSB m256, ymm, ymm [AVX2] +// * VPADDUSB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPADDUSB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPADDUSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDUSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDUSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPADDUSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPADDUSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDUSB", 3, Operands { v0, v1, v2 }) + // VPADDUSB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDUSB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDUSB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xdc) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDUSB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xdc) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDUSB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xdc) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDUSB") + } + return p +} + +// VPADDUSW performs "Add Packed Unsigned Word Integers with Unsigned Saturation". +// +// Mnemonic : VPADDUSW +// Supported forms : (10 forms) +// +// * VPADDUSW xmm, xmm, xmm [AVX] +// * VPADDUSW m128, xmm, xmm [AVX] +// * VPADDUSW ymm, ymm, ymm [AVX2] +// * VPADDUSW m256, ymm, ymm [AVX2] +// * VPADDUSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPADDUSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPADDUSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDUSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDUSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPADDUSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPADDUSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDUSW", 3, Operands { v0, v1, v2 }) + // VPADDUSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdd) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDUSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdd) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDUSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xdd) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDUSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xdd) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDUSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xdd) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDUSW") + } + return p +} + +// VPADDW performs "Add Packed Word Integers". +// +// Mnemonic : VPADDW +// Supported forms : (10 forms) +// +// * VPADDW xmm, xmm, xmm [AVX] +// * VPADDW m128, xmm, xmm [AVX] +// * VPADDW ymm, ymm, ymm [AVX2] +// * VPADDW m256, ymm, ymm [AVX2] +// * VPADDW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPADDW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPADDW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPADDW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPADDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDW", 3, Operands { v0, v1, v2 }) + // VPADDW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfd) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfd) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xfd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xfd) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xfd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xfd) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xfd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xfd) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDW") + } + return p +} + +// VPALIGNR performs "Packed Align Right". +// +// Mnemonic : VPALIGNR +// Supported forms : (10 forms) +// +// * VPALIGNR imm8, xmm, xmm, xmm [AVX] +// * VPALIGNR imm8, m128, xmm, xmm [AVX] +// * VPALIGNR imm8, ymm, ymm, ymm [AVX2] +// * VPALIGNR imm8, m256, ymm, ymm [AVX2] +// * VPALIGNR imm8, zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPALIGNR imm8, m512, zmm, zmm{k}{z} [AVX512BW] +// * VPALIGNR imm8, xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPALIGNR imm8, m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPALIGNR imm8, ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPALIGNR imm8, m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPALIGNR(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPALIGNR", 4, Operands { v0, v1, v2, v3 }) + // VPALIGNR imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, m512, zmm, zmm{k}{z} + if isImm8(v0) && isM512(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x0f) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, m128, xmm, xmm{k}{z} + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x0f) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, m256, ymm, ymm{k}{z} + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x0f) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPALIGNR") + } + return p +} + +// VPAND performs "Packed Bitwise Logical AND". +// +// Mnemonic : VPAND +// Supported forms : (4 forms) +// +// * VPAND xmm, xmm, xmm [AVX] +// * VPAND m128, xmm, xmm [AVX] +// * VPAND ymm, ymm, ymm [AVX2] +// * VPAND m256, ymm, ymm [AVX2] +// +func (self *Program) VPAND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPAND", 3, Operands { v0, v1, v2 }) + // VPAND xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAND m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPAND ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAND m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPAND") + } + return p +} + +// VPANDD performs "Bitwise Logical AND of Packed Doubleword Integers". +// +// Mnemonic : VPANDD +// Supported forms : (6 forms) +// +// * VPANDD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPANDD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPANDD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPANDD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPANDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPANDD", 3, Operands { v0, v1, v2 }) + // VPANDD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPANDD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPANDD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPANDD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPANDD") + } + return p +} + +// VPANDN performs "Packed Bitwise Logical AND NOT". +// +// Mnemonic : VPANDN +// Supported forms : (4 forms) +// +// * VPANDN xmm, xmm, xmm [AVX] +// * VPANDN m128, xmm, xmm [AVX] +// * VPANDN ymm, ymm, ymm [AVX2] +// * VPANDN m256, ymm, ymm [AVX2] +// +func (self *Program) VPANDN(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPANDN", 3, Operands { v0, v1, v2 }) + // VPANDN xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDN m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPANDN ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDN m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPANDN") + } + return p +} + +// VPANDND performs "Bitwise Logical AND NOT of Packed Doubleword Integers". +// +// Mnemonic : VPANDND +// Supported forms : (6 forms) +// +// * VPANDND m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPANDND zmm, zmm, zmm{k}{z} [AVX512F] +// * VPANDND m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDND xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDND m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPANDND ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPANDND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPANDND", 3, Operands { v0, v1, v2 }) + // VPANDND m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPANDND zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDND m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPANDND xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDND m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPANDND ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPANDND") + } + return p +} + +// VPANDNQ performs "Bitwise Logical AND NOT of Packed Quadword Integers". +// +// Mnemonic : VPANDNQ +// Supported forms : (6 forms) +// +// * VPANDNQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPANDNQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPANDNQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDNQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDNQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPANDNQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPANDNQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPANDNQ", 3, Operands { v0, v1, v2 }) + // VPANDNQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPANDNQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDNQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPANDNQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDNQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPANDNQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPANDNQ") + } + return p +} + +// VPANDQ performs "Bitwise Logical AND of Packed Quadword Integers". +// +// Mnemonic : VPANDQ +// Supported forms : (6 forms) +// +// * VPANDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPANDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPANDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPANDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPANDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPANDQ", 3, Operands { v0, v1, v2 }) + // VPANDQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPANDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPANDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPANDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPANDQ") + } + return p +} + +// VPAVGB performs "Average Packed Byte Integers". +// +// Mnemonic : VPAVGB +// Supported forms : (10 forms) +// +// * VPAVGB xmm, xmm, xmm [AVX] +// * VPAVGB m128, xmm, xmm [AVX] +// * VPAVGB ymm, ymm, ymm [AVX2] +// * VPAVGB m256, ymm, ymm [AVX2] +// * VPAVGB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPAVGB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPAVGB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPAVGB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPAVGB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPAVGB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPAVGB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPAVGB", 3, Operands { v0, v1, v2 }) + // VPAVGB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe0) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPAVGB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe0) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPAVGB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe0) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPAVGB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe0) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPAVGB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe0) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPAVGB") + } + return p +} + +// VPAVGW performs "Average Packed Word Integers". +// +// Mnemonic : VPAVGW +// Supported forms : (10 forms) +// +// * VPAVGW xmm, xmm, xmm [AVX] +// * VPAVGW m128, xmm, xmm [AVX] +// * VPAVGW ymm, ymm, ymm [AVX2] +// * VPAVGW m256, ymm, ymm [AVX2] +// * VPAVGW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPAVGW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPAVGW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPAVGW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPAVGW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPAVGW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPAVGW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPAVGW", 3, Operands { v0, v1, v2 }) + // VPAVGW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe3) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPAVGW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe3) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPAVGW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe3) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPAVGW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe3) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPAVGW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe3) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPAVGW") + } + return p +} + +// VPBLENDD performs "Blend Packed Doublewords". +// +// Mnemonic : VPBLENDD +// Supported forms : (4 forms) +// +// * VPBLENDD imm8, xmm, xmm, xmm [AVX2] +// * VPBLENDD imm8, m128, xmm, xmm [AVX2] +// * VPBLENDD imm8, ymm, ymm, ymm [AVX2] +// * VPBLENDD imm8, m256, ymm, ymm [AVX2] +// +func (self *Program) VPBLENDD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPBLENDD", 4, Operands { v0, v1, v2, v3 }) + // VPBLENDD imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x02) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPBLENDD imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x02) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPBLENDD imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x02) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPBLENDD imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x02) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPBLENDD") + } + return p +} + +// VPBLENDMB performs "Blend Byte Vectors Using an OpMask Control". +// +// Mnemonic : VPBLENDMB +// Supported forms : (6 forms) +// +// * VPBLENDMB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPBLENDMB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPBLENDMB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBLENDMB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBLENDMB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPBLENDMB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPBLENDMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPBLENDMB", 3, Operands { v0, v1, v2 }) + // VPBLENDMB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPBLENDMB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPBLENDMB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPBLENDMB") + } + return p +} + +// VPBLENDMD performs "Blend Doubleword Vectors Using an OpMask Control". +// +// Mnemonic : VPBLENDMD +// Supported forms : (6 forms) +// +// * VPBLENDMD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPBLENDMD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPBLENDMD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBLENDMD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBLENDMD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPBLENDMD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPBLENDMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPBLENDMD", 3, Operands { v0, v1, v2 }) + // VPBLENDMD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPBLENDMD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPBLENDMD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPBLENDMD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPBLENDMD") + } + return p +} + +// VPBLENDMQ performs "Blend Quadword Vectors Using an OpMask Control". +// +// Mnemonic : VPBLENDMQ +// Supported forms : (6 forms) +// +// * VPBLENDMQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPBLENDMQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPBLENDMQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBLENDMQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBLENDMQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPBLENDMQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPBLENDMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPBLENDMQ", 3, Operands { v0, v1, v2 }) + // VPBLENDMQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPBLENDMQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPBLENDMQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPBLENDMQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPBLENDMQ") + } + return p +} + +// VPBLENDMW performs "Blend Word Vectors Using an OpMask Control". +// +// Mnemonic : VPBLENDMW +// Supported forms : (6 forms) +// +// * VPBLENDMW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPBLENDMW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPBLENDMW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBLENDMW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBLENDMW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPBLENDMW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPBLENDMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPBLENDMW", 3, Operands { v0, v1, v2 }) + // VPBLENDMW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPBLENDMW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPBLENDMW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPBLENDMW") + } + return p +} + +// VPBLENDVB performs "Variable Blend Packed Bytes". +// +// Mnemonic : VPBLENDVB +// Supported forms : (4 forms) +// +// * VPBLENDVB xmm, xmm, xmm, xmm [AVX] +// * VPBLENDVB xmm, m128, xmm, xmm [AVX] +// * VPBLENDVB ymm, ymm, ymm, ymm [AVX2] +// * VPBLENDVB ymm, m256, ymm, ymm [AVX2] +// +func (self *Program) VPBLENDVB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPBLENDVB", 4, Operands { v0, v1, v2, v3 }) + // VPBLENDVB xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPBLENDVB xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x4c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPBLENDVB ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPBLENDVB ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x4c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPBLENDVB") + } + return p +} + +// VPBLENDW performs "Blend Packed Words". +// +// Mnemonic : VPBLENDW +// Supported forms : (4 forms) +// +// * VPBLENDW imm8, xmm, xmm, xmm [AVX] +// * VPBLENDW imm8, m128, xmm, xmm [AVX] +// * VPBLENDW imm8, ymm, ymm, ymm [AVX2] +// * VPBLENDW imm8, m256, ymm, ymm [AVX2] +// +func (self *Program) VPBLENDW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPBLENDW", 4, Operands { v0, v1, v2, v3 }) + // VPBLENDW imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x0e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPBLENDW imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPBLENDW imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x0e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPBLENDW imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPBLENDW") + } + return p +} + +// VPBROADCASTB performs "Broadcast Byte Integer". +// +// Mnemonic : VPBROADCASTB +// Supported forms : (13 forms) +// +// * VPBROADCASTB xmm, xmm [AVX2] +// * VPBROADCASTB m8, xmm [AVX2] +// * VPBROADCASTB xmm, ymm [AVX2] +// * VPBROADCASTB m8, ymm [AVX2] +// * VPBROADCASTB r32, zmm{k}{z} [AVX512BW] +// * VPBROADCASTB xmm, zmm{k}{z} [AVX512BW] +// * VPBROADCASTB m8, zmm{k}{z} [AVX512BW] +// * VPBROADCASTB r32, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTB r32, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTB xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTB xmm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTB m8, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTB m8, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPBROADCASTB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPBROADCASTB", 2, Operands { v0, v1 }) + // VPBROADCASTB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB m8, xmm + if isM8(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTB xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB m8, ymm + if isM8(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTB r32, zmm{k}{z} + if isReg32(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB m8, zmm{k}{z} + if isM8(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTB r32, xmm{k}{z} + if isReg32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB r32, ymm{k}{z} + if isReg32(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB m8, xmm{k}{z} + if isM8(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTB m8, ymm{k}{z} + if isM8(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPBROADCASTB") + } + return p +} + +// VPBROADCASTD performs "Broadcast Doubleword Integer". +// +// Mnemonic : VPBROADCASTD +// Supported forms : (13 forms) +// +// * VPBROADCASTD xmm, xmm [AVX2] +// * VPBROADCASTD m32, xmm [AVX2] +// * VPBROADCASTD xmm, ymm [AVX2] +// * VPBROADCASTD m32, ymm [AVX2] +// * VPBROADCASTD r32, zmm{k}{z} [AVX512F] +// * VPBROADCASTD xmm, zmm{k}{z} [AVX512F] +// * VPBROADCASTD m32, zmm{k}{z} [AVX512F] +// * VPBROADCASTD r32, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTD r32, ymm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTD m32, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTD m32, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPBROADCASTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPBROADCASTD", 2, Operands { v0, v1 }) + // VPBROADCASTD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTD xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD m32, ymm + if isM32(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTD r32, zmm{k}{z} + if isReg32(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD m32, zmm{k}{z} + if isM32(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPBROADCASTD r32, xmm{k}{z} + if isReg32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD r32, ymm{k}{z} + if isReg32(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD m32, xmm{k}{z} + if isM32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPBROADCASTD m32, ymm{k}{z} + if isM32(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPBROADCASTD") + } + return p +} + +// VPBROADCASTMB2Q performs "Broadcast Low Byte of Mask Register to Packed Quadword Values". +// +// Mnemonic : VPBROADCASTMB2Q +// Supported forms : (3 forms) +// +// * VPBROADCASTMB2Q k, xmm [AVX512CD,AVX512VL] +// * VPBROADCASTMB2Q k, ymm [AVX512CD,AVX512VL] +// * VPBROADCASTMB2Q k, zmm [AVX512CD] +// +func (self *Program) VPBROADCASTMB2Q(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPBROADCASTMB2Q", 2, Operands { v0, v1 }) + // VPBROADCASTMB2Q k, xmm + if isK(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x08) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTMB2Q k, ymm + if isK(v0) && isEVEXYMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x28) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTMB2Q k, zmm + if isK(v0) && isZMM(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPBROADCASTMB2Q") + } + return p +} + +// VPBROADCASTMW2D performs "Broadcast Low Word of Mask Register to Packed Doubleword Values". +// +// Mnemonic : VPBROADCASTMW2D +// Supported forms : (3 forms) +// +// * VPBROADCASTMW2D k, xmm [AVX512CD,AVX512VL] +// * VPBROADCASTMW2D k, ymm [AVX512CD,AVX512VL] +// * VPBROADCASTMW2D k, zmm [AVX512CD] +// +func (self *Program) VPBROADCASTMW2D(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPBROADCASTMW2D", 2, Operands { v0, v1 }) + // VPBROADCASTMW2D k, xmm + if isK(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x08) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTMW2D k, ymm + if isK(v0) && isEVEXYMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x28) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTMW2D k, zmm + if isK(v0) && isZMM(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPBROADCASTMW2D") + } + return p +} + +// VPBROADCASTQ performs "Broadcast Quadword Integer". +// +// Mnemonic : VPBROADCASTQ +// Supported forms : (13 forms) +// +// * VPBROADCASTQ xmm, xmm [AVX2] +// * VPBROADCASTQ m64, xmm [AVX2] +// * VPBROADCASTQ xmm, ymm [AVX2] +// * VPBROADCASTQ m64, ymm [AVX2] +// * VPBROADCASTQ r64, zmm{k}{z} [AVX512F] +// * VPBROADCASTQ xmm, zmm{k}{z} [AVX512F] +// * VPBROADCASTQ m64, zmm{k}{z} [AVX512F] +// * VPBROADCASTQ r64, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTQ r64, ymm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTQ xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTQ m64, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTQ m64, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPBROADCASTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPBROADCASTQ", 2, Operands { v0, v1 }) + // VPBROADCASTQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTQ xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ m64, ymm + if isM64(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTQ r64, zmm{k}{z} + if isReg64(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ m64, zmm{k}{z} + if isM64(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPBROADCASTQ r64, xmm{k}{z} + if isReg64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ r64, ymm{k}{z} + if isReg64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPBROADCASTQ m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPBROADCASTQ") + } + return p +} + +// VPBROADCASTW performs "Broadcast Word Integer". +// +// Mnemonic : VPBROADCASTW +// Supported forms : (13 forms) +// +// * VPBROADCASTW xmm, xmm [AVX2] +// * VPBROADCASTW m16, xmm [AVX2] +// * VPBROADCASTW xmm, ymm [AVX2] +// * VPBROADCASTW m16, ymm [AVX2] +// * VPBROADCASTW r32, zmm{k}{z} [AVX512BW] +// * VPBROADCASTW xmm, zmm{k}{z} [AVX512BW] +// * VPBROADCASTW m16, zmm{k}{z} [AVX512BW] +// * VPBROADCASTW r32, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTW r32, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTW xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTW xmm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTW m16, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTW m16, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPBROADCASTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPBROADCASTW", 2, Operands { v0, v1 }) + // VPBROADCASTW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW m16, xmm + if isM16(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTW xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW m16, ymm + if isM16(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTW r32, zmm{k}{z} + if isReg32(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW m16, zmm{k}{z} + if isM16(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 2) + }) + } + // VPBROADCASTW r32, xmm{k}{z} + if isReg32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW r32, ymm{k}{z} + if isReg32(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW m16, xmm{k}{z} + if isM16(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 2) + }) + } + // VPBROADCASTW m16, ymm{k}{z} + if isM16(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 2) + }) + } + if p.len == 0 { + panic("invalid operands for VPBROADCASTW") + } + return p +} + +// VPCLMULQDQ performs "Carry-Less Quadword Multiplication". +// +// Mnemonic : VPCLMULQDQ +// Supported forms : (2 forms) +// +// * VPCLMULQDQ imm8, xmm, xmm, xmm [AVX,PCLMULQDQ] +// * VPCLMULQDQ imm8, m128, xmm, xmm [AVX,PCLMULQDQ] +// +func (self *Program) VPCLMULQDQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCLMULQDQ", 4, Operands { v0, v1, v2, v3 }) + // VPCLMULQDQ imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX | ISA_PCLMULQDQ) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x44) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCLMULQDQ imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX | ISA_PCLMULQDQ) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x44) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCLMULQDQ") + } + return p +} + +// VPCMOV performs "Packed Conditional Move". +// +// Mnemonic : VPCMOV +// Supported forms : (6 forms) +// +// * VPCMOV xmm, xmm, xmm, xmm [XOP] +// * VPCMOV m128, xmm, xmm, xmm [XOP] +// * VPCMOV xmm, m128, xmm, xmm [XOP] +// * VPCMOV ymm, ymm, ymm, ymm [XOP] +// * VPCMOV m256, ymm, ymm, ymm [XOP] +// * VPCMOV ymm, m256, ymm, ymm [XOP] +// +func (self *Program) VPCMOV(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMOV", 4, Operands { v0, v1, v2, v3 }) + // VPCMOV xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xa2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[2]) << 3)) + m.emit(0xa2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + } + // VPCMOV m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x80, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0xa2) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VPCMOV xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xa2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPCMOV ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit(0xa2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfc ^ (hlcode(v[2]) << 3)) + m.emit(0xa2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + } + // VPCMOV m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x84, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0xa2) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VPCMOV ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x04, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xa2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMOV") + } + return p +} + +// VPCMPB performs "Compare Packed Signed Byte Values". +// +// Mnemonic : VPCMPB +// Supported forms : (6 forms) +// +// * VPCMPB imm8, zmm, zmm, k{k} [AVX512BW] +// * VPCMPB imm8, m512, zmm, k{k} [AVX512BW] +// * VPCMPB imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPB imm8, m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPB imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPB imm8, m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPB", 4, Operands { v0, v1, v2, v3 }) + // VPCMPB imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPB imm8, m512, zmm, k{k} + if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3f) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPB imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPB imm8, m128, xmm, k{k} + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3f) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPB imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPB imm8, m256, ymm, k{k} + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3f) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPB") + } + return p +} + +// VPCMPD performs "Compare Packed Signed Doubleword Values". +// +// Mnemonic : VPCMPD +// Supported forms : (6 forms) +// +// * VPCMPD imm8, m512/m32bcst, zmm, k{k} [AVX512F] +// * VPCMPD imm8, zmm, zmm, k{k} [AVX512F] +// * VPCMPD imm8, m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPD imm8, xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPD imm8, m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPD imm8, ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPD", 4, Operands { v0, v1, v2, v3 }) + // VPCMPD imm8, m512/m32bcst, zmm, k{k} + if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1f) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPD imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPD imm8, m128/m32bcst, xmm, k{k} + if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1f) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPD imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPD imm8, m256/m32bcst, ymm, k{k} + if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1f) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPD imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPD") + } + return p +} + +// VPCMPEQB performs "Compare Packed Byte Data for Equality". +// +// Mnemonic : VPCMPEQB +// Supported forms : (10 forms) +// +// * VPCMPEQB xmm, xmm, xmm [AVX] +// * VPCMPEQB m128, xmm, xmm [AVX] +// * VPCMPEQB ymm, ymm, ymm [AVX2] +// * VPCMPEQB m256, ymm, ymm [AVX2] +// * VPCMPEQB zmm, zmm, k{k} [AVX512BW] +// * VPCMPEQB m512, zmm, k{k} [AVX512BW] +// * VPCMPEQB xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPEQB m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPEQB ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPEQB m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPEQB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPEQB", 3, Operands { v0, v1, v2 }) + // VPCMPEQB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x74) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x74) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x74) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x74) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQB zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x74) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQB m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x74) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPEQB xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x74) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQB m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x74) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPEQB ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x74) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQB m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x74) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPEQB") + } + return p +} + +// VPCMPEQD performs "Compare Packed Doubleword Data for Equality". +// +// Mnemonic : VPCMPEQD +// Supported forms : (10 forms) +// +// * VPCMPEQD xmm, xmm, xmm [AVX] +// * VPCMPEQD m128, xmm, xmm [AVX] +// * VPCMPEQD ymm, ymm, ymm [AVX2] +// * VPCMPEQD m256, ymm, ymm [AVX2] +// * VPCMPEQD m512/m32bcst, zmm, k{k} [AVX512F] +// * VPCMPEQD zmm, zmm, k{k} [AVX512F] +// * VPCMPEQD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPEQD xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPEQD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPEQD ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPEQD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPEQD", 3, Operands { v0, v1, v2 }) + // VPCMPEQD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQD m512/m32bcst, zmm, k{k} + if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPEQD zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQD m128/m32bcst, xmm, k{k} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPEQD xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQD m256/m32bcst, ymm, k{k} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPCMPEQD ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPEQD") + } + return p +} + +// VPCMPEQQ performs "Compare Packed Quadword Data for Equality". +// +// Mnemonic : VPCMPEQQ +// Supported forms : (10 forms) +// +// * VPCMPEQQ xmm, xmm, xmm [AVX] +// * VPCMPEQQ m128, xmm, xmm [AVX] +// * VPCMPEQQ ymm, ymm, ymm [AVX2] +// * VPCMPEQQ m256, ymm, ymm [AVX2] +// * VPCMPEQQ m512/m64bcst, zmm, k{k} [AVX512F] +// * VPCMPEQQ zmm, zmm, k{k} [AVX512F] +// * VPCMPEQQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPEQQ xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPEQQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPEQQ ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPEQQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPEQQ", 3, Operands { v0, v1, v2 }) + // VPCMPEQQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x29) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x29) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x29) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x29) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQQ m512/m64bcst, zmm, k{k} + if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x29) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPEQQ zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x29) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQQ m128/m64bcst, xmm, k{k} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x29) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPEQQ xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x29) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQQ m256/m64bcst, ymm, k{k} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x29) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPCMPEQQ ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x29) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPEQQ") + } + return p +} + +// VPCMPEQW performs "Compare Packed Word Data for Equality". +// +// Mnemonic : VPCMPEQW +// Supported forms : (10 forms) +// +// * VPCMPEQW xmm, xmm, xmm [AVX] +// * VPCMPEQW m128, xmm, xmm [AVX] +// * VPCMPEQW ymm, ymm, ymm [AVX2] +// * VPCMPEQW m256, ymm, ymm [AVX2] +// * VPCMPEQW zmm, zmm, k{k} [AVX512BW] +// * VPCMPEQW m512, zmm, k{k} [AVX512BW] +// * VPCMPEQW xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPEQW m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPEQW ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPEQW m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPEQW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPEQW", 3, Operands { v0, v1, v2 }) + // VPCMPEQW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQW zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQW m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPEQW xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQW m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPEQW ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQW m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPEQW") + } + return p +} + +// VPCMPESTRI performs "Packed Compare Explicit Length Strings, Return Index". +// +// Mnemonic : VPCMPESTRI +// Supported forms : (2 forms) +// +// * VPCMPESTRI imm8, xmm, xmm [AVX] +// * VPCMPESTRI imm8, m128, xmm [AVX] +// +func (self *Program) VPCMPESTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPESTRI", 3, Operands { v0, v1, v2 }) + // VPCMPESTRI imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x61) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPESTRI imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x61) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPESTRI") + } + return p +} + +// VPCMPESTRM performs "Packed Compare Explicit Length Strings, Return Mask". +// +// Mnemonic : VPCMPESTRM +// Supported forms : (2 forms) +// +// * VPCMPESTRM imm8, xmm, xmm [AVX] +// * VPCMPESTRM imm8, m128, xmm [AVX] +// +func (self *Program) VPCMPESTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPESTRM", 3, Operands { v0, v1, v2 }) + // VPCMPESTRM imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x60) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPESTRM imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x60) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPESTRM") + } + return p +} + +// VPCMPGTB performs "Compare Packed Signed Byte Integers for Greater Than". +// +// Mnemonic : VPCMPGTB +// Supported forms : (10 forms) +// +// * VPCMPGTB xmm, xmm, xmm [AVX] +// * VPCMPGTB m128, xmm, xmm [AVX] +// * VPCMPGTB ymm, ymm, ymm [AVX2] +// * VPCMPGTB m256, ymm, ymm [AVX2] +// * VPCMPGTB zmm, zmm, k{k} [AVX512BW] +// * VPCMPGTB m512, zmm, k{k} [AVX512BW] +// * VPCMPGTB xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPGTB m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPGTB ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPGTB m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPGTB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPGTB", 3, Operands { v0, v1, v2 }) + // VPCMPGTB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTB zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTB m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPGTB xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTB m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPGTB ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTB m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPGTB") + } + return p +} + +// VPCMPGTD performs "Compare Packed Signed Doubleword Integers for Greater Than". +// +// Mnemonic : VPCMPGTD +// Supported forms : (10 forms) +// +// * VPCMPGTD xmm, xmm, xmm [AVX] +// * VPCMPGTD m128, xmm, xmm [AVX] +// * VPCMPGTD ymm, ymm, ymm [AVX2] +// * VPCMPGTD m256, ymm, ymm [AVX2] +// * VPCMPGTD m512/m32bcst, zmm, k{k} [AVX512F] +// * VPCMPGTD zmm, zmm, k{k} [AVX512F] +// * VPCMPGTD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPGTD xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPGTD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPGTD ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPGTD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPGTD", 3, Operands { v0, v1, v2 }) + // VPCMPGTD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTD m512/m32bcst, zmm, k{k} + if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPGTD zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTD m128/m32bcst, xmm, k{k} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPGTD xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTD m256/m32bcst, ymm, k{k} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPCMPGTD ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPGTD") + } + return p +} + +// VPCMPGTQ performs "Compare Packed Data for Greater Than". +// +// Mnemonic : VPCMPGTQ +// Supported forms : (10 forms) +// +// * VPCMPGTQ xmm, xmm, xmm [AVX] +// * VPCMPGTQ m128, xmm, xmm [AVX] +// * VPCMPGTQ ymm, ymm, ymm [AVX2] +// * VPCMPGTQ m256, ymm, ymm [AVX2] +// * VPCMPGTQ m512/m64bcst, zmm, k{k} [AVX512F] +// * VPCMPGTQ zmm, zmm, k{k} [AVX512F] +// * VPCMPGTQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPGTQ xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPGTQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPGTQ ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPGTQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPGTQ", 3, Operands { v0, v1, v2 }) + // VPCMPGTQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x37) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x37) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x37) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x37) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTQ m512/m64bcst, zmm, k{k} + if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x37) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPGTQ zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x37) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTQ m128/m64bcst, xmm, k{k} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x37) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPGTQ xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x37) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTQ m256/m64bcst, ymm, k{k} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x37) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPCMPGTQ ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x37) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPGTQ") + } + return p +} + +// VPCMPGTW performs "Compare Packed Signed Word Integers for Greater Than". +// +// Mnemonic : VPCMPGTW +// Supported forms : (10 forms) +// +// * VPCMPGTW xmm, xmm, xmm [AVX] +// * VPCMPGTW m128, xmm, xmm [AVX] +// * VPCMPGTW ymm, ymm, ymm [AVX2] +// * VPCMPGTW m256, ymm, ymm [AVX2] +// * VPCMPGTW zmm, zmm, k{k} [AVX512BW] +// * VPCMPGTW m512, zmm, k{k} [AVX512BW] +// * VPCMPGTW xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPGTW m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPGTW ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPGTW m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPGTW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPGTW", 3, Operands { v0, v1, v2 }) + // VPCMPGTW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTW zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTW m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPGTW xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTW m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPGTW ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTW m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPGTW") + } + return p +} + +// VPCMPISTRI performs "Packed Compare Implicit Length Strings, Return Index". +// +// Mnemonic : VPCMPISTRI +// Supported forms : (2 forms) +// +// * VPCMPISTRI imm8, xmm, xmm [AVX] +// * VPCMPISTRI imm8, m128, xmm [AVX] +// +func (self *Program) VPCMPISTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPISTRI", 3, Operands { v0, v1, v2 }) + // VPCMPISTRI imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x63) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPISTRI imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x63) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPISTRI") + } + return p +} + +// VPCMPISTRM performs "Packed Compare Implicit Length Strings, Return Mask". +// +// Mnemonic : VPCMPISTRM +// Supported forms : (2 forms) +// +// * VPCMPISTRM imm8, xmm, xmm [AVX] +// * VPCMPISTRM imm8, m128, xmm [AVX] +// +func (self *Program) VPCMPISTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPISTRM", 3, Operands { v0, v1, v2 }) + // VPCMPISTRM imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x62) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPISTRM imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x62) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPISTRM") + } + return p +} + +// VPCMPQ performs "Compare Packed Signed Quadword Values". +// +// Mnemonic : VPCMPQ +// Supported forms : (6 forms) +// +// * VPCMPQ imm8, m512/m64bcst, zmm, k{k} [AVX512F] +// * VPCMPQ imm8, zmm, zmm, k{k} [AVX512F] +// * VPCMPQ imm8, m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPQ imm8, xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPQ imm8, m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPQ imm8, ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPQ", 4, Operands { v0, v1, v2, v3 }) + // VPCMPQ imm8, m512/m64bcst, zmm, k{k} + if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1f) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPQ imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPQ imm8, m128/m64bcst, xmm, k{k} + if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1f) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPQ imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPQ imm8, m256/m64bcst, ymm, k{k} + if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1f) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPQ imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPQ") + } + return p +} + +// VPCMPUB performs "Compare Packed Unsigned Byte Values". +// +// Mnemonic : VPCMPUB +// Supported forms : (6 forms) +// +// * VPCMPUB imm8, zmm, zmm, k{k} [AVX512BW] +// * VPCMPUB imm8, m512, zmm, k{k} [AVX512BW] +// * VPCMPUB imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPUB imm8, m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPUB imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPUB imm8, m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPUB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPUB", 4, Operands { v0, v1, v2, v3 }) + // VPCMPUB imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUB imm8, m512, zmm, k{k} + if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3e) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUB imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUB imm8, m128, xmm, k{k} + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3e) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUB imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUB imm8, m256, ymm, k{k} + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3e) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPUB") + } + return p +} + +// VPCMPUD performs "Compare Packed Unsigned Doubleword Values". +// +// Mnemonic : VPCMPUD +// Supported forms : (6 forms) +// +// * VPCMPUD imm8, m512/m32bcst, zmm, k{k} [AVX512F] +// * VPCMPUD imm8, zmm, zmm, k{k} [AVX512F] +// * VPCMPUD imm8, m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPUD imm8, xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPUD imm8, m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPUD imm8, ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPUD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPUD", 4, Operands { v0, v1, v2, v3 }) + // VPCMPUD imm8, m512/m32bcst, zmm, k{k} + if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1e) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUD imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUD imm8, m128/m32bcst, xmm, k{k} + if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1e) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUD imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUD imm8, m256/m32bcst, ymm, k{k} + if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1e) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUD imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPUD") + } + return p +} + +// VPCMPUQ performs "Compare Packed Unsigned Quadword Values". +// +// Mnemonic : VPCMPUQ +// Supported forms : (6 forms) +// +// * VPCMPUQ imm8, m512/m64bcst, zmm, k{k} [AVX512F] +// * VPCMPUQ imm8, zmm, zmm, k{k} [AVX512F] +// * VPCMPUQ imm8, m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPUQ imm8, xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPUQ imm8, m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPUQ imm8, ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPUQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPUQ", 4, Operands { v0, v1, v2, v3 }) + // VPCMPUQ imm8, m512/m64bcst, zmm, k{k} + if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1e) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUQ imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUQ imm8, m128/m64bcst, xmm, k{k} + if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1e) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUQ imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUQ imm8, m256/m64bcst, ymm, k{k} + if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1e) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUQ imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPUQ") + } + return p +} + +// VPCMPUW performs "Compare Packed Unsigned Word Values". +// +// Mnemonic : VPCMPUW +// Supported forms : (6 forms) +// +// * VPCMPUW imm8, zmm, zmm, k{k} [AVX512BW] +// * VPCMPUW imm8, m512, zmm, k{k} [AVX512BW] +// * VPCMPUW imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPUW imm8, m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPUW imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPUW imm8, m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPUW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPUW", 4, Operands { v0, v1, v2, v3 }) + // VPCMPUW imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUW imm8, m512, zmm, k{k} + if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3e) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUW imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUW imm8, m128, xmm, k{k} + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3e) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUW imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUW imm8, m256, ymm, k{k} + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3e) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPUW") + } + return p +} + +// VPCMPW performs "Compare Packed Signed Word Values". +// +// Mnemonic : VPCMPW +// Supported forms : (6 forms) +// +// * VPCMPW imm8, zmm, zmm, k{k} [AVX512BW] +// * VPCMPW imm8, m512, zmm, k{k} [AVX512BW] +// * VPCMPW imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPW imm8, m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPW imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPW imm8, m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPW", 4, Operands { v0, v1, v2, v3 }) + // VPCMPW imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPW imm8, m512, zmm, k{k} + if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3f) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPW imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPW imm8, m128, xmm, k{k} + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3f) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPW imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPW imm8, m256, ymm, k{k} + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3f) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPW") + } + return p +} + +// VPCOMB performs "Compare Packed Signed Byte Integers". +// +// Mnemonic : VPCOMB +// Supported forms : (2 forms) +// +// * VPCOMB imm8, xmm, xmm, xmm [XOP] +// * VPCOMB imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMB", 4, Operands { v0, v1, v2, v3 }) + // VPCOMB imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xcc) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMB imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xcc) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMB") + } + return p +} + +// VPCOMD performs "Compare Packed Signed Doubleword Integers". +// +// Mnemonic : VPCOMD +// Supported forms : (2 forms) +// +// * VPCOMD imm8, xmm, xmm, xmm [XOP] +// * VPCOMD imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMD", 4, Operands { v0, v1, v2, v3 }) + // VPCOMD imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xce) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMD imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xce) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMD") + } + return p +} + +// VPCOMPRESSD performs "Store Sparse Packed Doubleword Integer Values into Dense Memory/Register". +// +// Mnemonic : VPCOMPRESSD +// Supported forms : (6 forms) +// +// * VPCOMPRESSD zmm, zmm{k}{z} [AVX512F] +// * VPCOMPRESSD zmm, m512{k}{z} [AVX512F] +// * VPCOMPRESSD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPCOMPRESSD xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VPCOMPRESSD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPCOMPRESSD ymm, m256{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPCOMPRESSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPCOMPRESSD", 2, Operands { v0, v1 }) + // VPCOMPRESSD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPCOMPRESSD zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8b) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPCOMPRESSD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPCOMPRESSD xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8b) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPCOMPRESSD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPCOMPRESSD ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8b) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMPRESSD") + } + return p +} + +// VPCOMPRESSQ performs "Store Sparse Packed Quadword Integer Values into Dense Memory/Register". +// +// Mnemonic : VPCOMPRESSQ +// Supported forms : (6 forms) +// +// * VPCOMPRESSQ zmm, zmm{k}{z} [AVX512F] +// * VPCOMPRESSQ zmm, m512{k}{z} [AVX512F] +// * VPCOMPRESSQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPCOMPRESSQ xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VPCOMPRESSQ ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPCOMPRESSQ ymm, m256{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPCOMPRESSQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPCOMPRESSQ", 2, Operands { v0, v1 }) + // VPCOMPRESSQ zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPCOMPRESSQ zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8b) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPCOMPRESSQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPCOMPRESSQ xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8b) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPCOMPRESSQ ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPCOMPRESSQ ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8b) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMPRESSQ") + } + return p +} + +// VPCOMQ performs "Compare Packed Signed Quadword Integers". +// +// Mnemonic : VPCOMQ +// Supported forms : (2 forms) +// +// * VPCOMQ imm8, xmm, xmm, xmm [XOP] +// * VPCOMQ imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMQ", 4, Operands { v0, v1, v2, v3 }) + // VPCOMQ imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xcf) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMQ imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xcf) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMQ") + } + return p +} + +// VPCOMUB performs "Compare Packed Unsigned Byte Integers". +// +// Mnemonic : VPCOMUB +// Supported forms : (2 forms) +// +// * VPCOMUB imm8, xmm, xmm, xmm [XOP] +// * VPCOMUB imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMUB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMUB", 4, Operands { v0, v1, v2, v3 }) + // VPCOMUB imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xec) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMUB imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xec) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMUB") + } + return p +} + +// VPCOMUD performs "Compare Packed Unsigned Doubleword Integers". +// +// Mnemonic : VPCOMUD +// Supported forms : (2 forms) +// +// * VPCOMUD imm8, xmm, xmm, xmm [XOP] +// * VPCOMUD imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMUD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMUD", 4, Operands { v0, v1, v2, v3 }) + // VPCOMUD imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xee) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMUD imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xee) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMUD") + } + return p +} + +// VPCOMUQ performs "Compare Packed Unsigned Quadword Integers". +// +// Mnemonic : VPCOMUQ +// Supported forms : (2 forms) +// +// * VPCOMUQ imm8, xmm, xmm, xmm [XOP] +// * VPCOMUQ imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMUQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMUQ", 4, Operands { v0, v1, v2, v3 }) + // VPCOMUQ imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xef) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMUQ imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xef) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMUQ") + } + return p +} + +// VPCOMUW performs "Compare Packed Unsigned Word Integers". +// +// Mnemonic : VPCOMUW +// Supported forms : (2 forms) +// +// * VPCOMUW imm8, xmm, xmm, xmm [XOP] +// * VPCOMUW imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMUW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMUW", 4, Operands { v0, v1, v2, v3 }) + // VPCOMUW imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xed) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMUW imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xed) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMUW") + } + return p +} + +// VPCOMW performs "Compare Packed Signed Word Integers". +// +// Mnemonic : VPCOMW +// Supported forms : (2 forms) +// +// * VPCOMW imm8, xmm, xmm, xmm [XOP] +// * VPCOMW imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMW", 4, Operands { v0, v1, v2, v3 }) + // VPCOMW imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xcd) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMW imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xcd) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMW") + } + return p +} + +// VPCONFLICTD performs "Detect Conflicts Within a Vector of Packed Doubleword Values into Dense Memory/Register". +// +// Mnemonic : VPCONFLICTD +// Supported forms : (6 forms) +// +// * VPCONFLICTD m128/m32bcst, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTD m256/m32bcst, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTD m512/m32bcst, zmm{k}{z} [AVX512CD] +// * VPCONFLICTD xmm, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTD ymm, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTD zmm, zmm{k}{z} [AVX512CD] +// +func (self *Program) VPCONFLICTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPCONFLICTD", 2, Operands { v0, v1 }) + // VPCONFLICTD m128/m32bcst, xmm{k}{z} + if isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc4) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPCONFLICTD m256/m32bcst, ymm{k}{z} + if isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc4) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPCONFLICTD m512/m32bcst, zmm{k}{z} + if isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc4) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPCONFLICTD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPCONFLICTD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPCONFLICTD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCONFLICTD") + } + return p +} + +// VPCONFLICTQ performs "Detect Conflicts Within a Vector of Packed Quadword Values into Dense Memory/Register". +// +// Mnemonic : VPCONFLICTQ +// Supported forms : (6 forms) +// +// * VPCONFLICTQ m128/m64bcst, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTQ m256/m64bcst, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTQ m512/m64bcst, zmm{k}{z} [AVX512CD] +// * VPCONFLICTQ xmm, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTQ ymm, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTQ zmm, zmm{k}{z} [AVX512CD] +// +func (self *Program) VPCONFLICTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPCONFLICTQ", 2, Operands { v0, v1 }) + // VPCONFLICTQ m128/m64bcst, xmm{k}{z} + if isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc4) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPCONFLICTQ m256/m64bcst, ymm{k}{z} + if isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc4) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPCONFLICTQ m512/m64bcst, zmm{k}{z} + if isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc4) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPCONFLICTQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPCONFLICTQ ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPCONFLICTQ zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCONFLICTQ") + } + return p +} + +// VPERM2F128 performs "Permute Floating-Point Values". +// +// Mnemonic : VPERM2F128 +// Supported forms : (2 forms) +// +// * VPERM2F128 imm8, ymm, ymm, ymm [AVX] +// * VPERM2F128 imm8, m256, ymm, ymm [AVX] +// +func (self *Program) VPERM2F128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPERM2F128", 4, Operands { v0, v1, v2, v3 }) + // VPERM2F128 imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x06) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERM2F128 imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x06) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERM2F128") + } + return p +} + +// VPERM2I128 performs "Permute 128-Bit Integer Values". +// +// Mnemonic : VPERM2I128 +// Supported forms : (2 forms) +// +// * VPERM2I128 imm8, ymm, ymm, ymm [AVX2] +// * VPERM2I128 imm8, m256, ymm, ymm [AVX2] +// +func (self *Program) VPERM2I128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPERM2I128", 4, Operands { v0, v1, v2, v3 }) + // VPERM2I128 imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x46) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERM2I128 imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x46) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERM2I128") + } + return p +} + +// VPERMB performs "Permute Byte Integers". +// +// Mnemonic : VPERMB +// Supported forms : (6 forms) +// +// * VPERMB xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMB m128, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMB ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMB m256, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMB zmm, zmm, zmm{k}{z} [AVX512VBMI] +// * VPERMB m512, zmm, zmm{k}{z} [AVX512VBMI] +// +func (self *Program) VPERMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMB", 3, Operands { v0, v1, v2 }) + // VPERMB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x8d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x8d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x8d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x8d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x8d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x8d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMB") + } + return p +} + +// VPERMD performs "Permute Doubleword Integers". +// +// Mnemonic : VPERMD +// Supported forms : (6 forms) +// +// * VPERMD ymm, ymm, ymm [AVX2] +// * VPERMD m256, ymm, ymm [AVX2] +// * VPERMD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMD", 3, Operands { v0, v1, v2 }) + // VPERMD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x36) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x36) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPERMD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x36) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x36) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x36) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x36) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMD") + } + return p +} + +// VPERMI2B performs "Full Permute of Bytes From Two Tables Overwriting the Index". +// +// Mnemonic : VPERMI2B +// Supported forms : (6 forms) +// +// * VPERMI2B xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMI2B m128, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMI2B ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMI2B m256, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMI2B zmm, zmm, zmm{k}{z} [AVX512VBMI] +// * VPERMI2B m512, zmm, zmm{k}{z} [AVX512VBMI] +// +func (self *Program) VPERMI2B(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMI2B", 3, Operands { v0, v1, v2 }) + // VPERMI2B xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2B m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMI2B ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2B m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMI2B zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2B m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMI2B") + } + return p +} + +// VPERMI2D performs "Full Permute of Doublewords From Two Tables Overwriting the Index". +// +// Mnemonic : VPERMI2D +// Supported forms : (6 forms) +// +// * VPERMI2D m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2D zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2D m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2D xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2D m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2D ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMI2D(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMI2D", 3, Operands { v0, v1, v2 }) + // VPERMI2D m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMI2D zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2D m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMI2D xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2D m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMI2D ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMI2D") + } + return p +} + +// VPERMI2PD performs "Full Permute of Double-Precision Floating-Point Values From Two Tables Overwriting the Index". +// +// Mnemonic : VPERMI2PD +// Supported forms : (6 forms) +// +// * VPERMI2PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMI2PD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMI2PD", 3, Operands { v0, v1, v2 }) + // VPERMI2PD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x77) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMI2PD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x77) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2PD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x77) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMI2PD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x77) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2PD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x77) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMI2PD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x77) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMI2PD") + } + return p +} + +// VPERMI2PS performs "Full Permute of Single-Precision Floating-Point Values From Two Tables Overwriting the Index". +// +// Mnemonic : VPERMI2PS +// Supported forms : (6 forms) +// +// * VPERMI2PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMI2PS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMI2PS", 3, Operands { v0, v1, v2 }) + // VPERMI2PS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x77) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMI2PS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x77) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2PS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x77) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMI2PS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x77) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2PS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x77) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMI2PS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x77) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMI2PS") + } + return p +} + +// VPERMI2Q performs "Full Permute of Quadwords From Two Tables Overwriting the Index". +// +// Mnemonic : VPERMI2Q +// Supported forms : (6 forms) +// +// * VPERMI2Q m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2Q zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2Q m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2Q xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2Q m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2Q ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMI2Q(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMI2Q", 3, Operands { v0, v1, v2 }) + // VPERMI2Q m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMI2Q zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2Q m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMI2Q xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2Q m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMI2Q ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMI2Q") + } + return p +} + +// VPERMI2W performs "Full Permute of Words From Two Tables Overwriting the Index". +// +// Mnemonic : VPERMI2W +// Supported forms : (6 forms) +// +// * VPERMI2W zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPERMI2W m512, zmm, zmm{k}{z} [AVX512BW] +// * VPERMI2W xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPERMI2W m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPERMI2W ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPERMI2W m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPERMI2W(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMI2W", 3, Operands { v0, v1, v2 }) + // VPERMI2W zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2W m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMI2W xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2W m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMI2W ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2W m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMI2W") + } + return p +} + +// VPERMIL2PD performs "Permute Two-Source Double-Precision Floating-Point Vectors". +// +// Mnemonic : VPERMIL2PD +// Supported forms : (6 forms) +// +// * VPERMIL2PD imm4, xmm, xmm, xmm, xmm [XOP] +// * VPERMIL2PD imm4, m128, xmm, xmm, xmm [XOP] +// * VPERMIL2PD imm4, xmm, m128, xmm, xmm [XOP] +// * VPERMIL2PD imm4, ymm, ymm, ymm, ymm [XOP] +// * VPERMIL2PD imm4, m256, ymm, ymm, ymm [XOP] +// * VPERMIL2PD imm4, ymm, m256, ymm, ymm [XOP] +// +func (self *Program) VPERMIL2PD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, v4 interface{}) *Instruction { + p := self.alloc("VPERMIL2PD", 5, Operands { v0, v1, v2, v3, v4 }) + // VPERMIL2PD imm4, xmm, xmm, xmm, xmm + if isImm4(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x79 ^ (hlcode(v[3]) << 3)) + m.emit(0x49) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf9 ^ (hlcode(v[3]) << 3)) + m.emit(0x49) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1])) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PD imm4, m128, xmm, xmm, xmm + if isImm4(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[4]), addr(v[1]), hlcode(v[3])) + m.emit(0x49) + m.mrsd(lcode(v[4]), addr(v[1]), 1) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PD imm4, xmm, m128, xmm, xmm + if isImm4(v0) && isXMM(v1) && isM128(v2) && isXMM(v3) && isXMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[4]), addr(v[2]), hlcode(v[3])) + m.emit(0x49) + m.mrsd(lcode(v[4]), addr(v[2]), 1) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PD imm4, ymm, ymm, ymm, ymm + if isImm4(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit(0x49) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit(0x49) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1])) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PD imm4, m256, ymm, ymm, ymm + if isImm4(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[4]), addr(v[1]), hlcode(v[3])) + m.emit(0x49) + m.mrsd(lcode(v[4]), addr(v[1]), 1) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PD imm4, ymm, m256, ymm, ymm + if isImm4(v0) && isYMM(v1) && isM256(v2) && isYMM(v3) && isYMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[4]), addr(v[2]), hlcode(v[3])) + m.emit(0x49) + m.mrsd(lcode(v[4]), addr(v[2]), 1) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMIL2PD") + } + return p +} + +// VPERMIL2PS performs "Permute Two-Source Single-Precision Floating-Point Vectors". +// +// Mnemonic : VPERMIL2PS +// Supported forms : (6 forms) +// +// * VPERMIL2PS imm4, xmm, xmm, xmm, xmm [XOP] +// * VPERMIL2PS imm4, m128, xmm, xmm, xmm [XOP] +// * VPERMIL2PS imm4, xmm, m128, xmm, xmm [XOP] +// * VPERMIL2PS imm4, ymm, ymm, ymm, ymm [XOP] +// * VPERMIL2PS imm4, m256, ymm, ymm, ymm [XOP] +// * VPERMIL2PS imm4, ymm, m256, ymm, ymm [XOP] +// +func (self *Program) VPERMIL2PS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, v4 interface{}) *Instruction { + p := self.alloc("VPERMIL2PS", 5, Operands { v0, v1, v2, v3, v4 }) + // VPERMIL2PS imm4, xmm, xmm, xmm, xmm + if isImm4(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x79 ^ (hlcode(v[3]) << 3)) + m.emit(0x48) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf9 ^ (hlcode(v[3]) << 3)) + m.emit(0x48) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1])) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PS imm4, m128, xmm, xmm, xmm + if isImm4(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[4]), addr(v[1]), hlcode(v[3])) + m.emit(0x48) + m.mrsd(lcode(v[4]), addr(v[1]), 1) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PS imm4, xmm, m128, xmm, xmm + if isImm4(v0) && isXMM(v1) && isM128(v2) && isXMM(v3) && isXMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[4]), addr(v[2]), hlcode(v[3])) + m.emit(0x48) + m.mrsd(lcode(v[4]), addr(v[2]), 1) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PS imm4, ymm, ymm, ymm, ymm + if isImm4(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit(0x48) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit(0x48) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1])) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PS imm4, m256, ymm, ymm, ymm + if isImm4(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[4]), addr(v[1]), hlcode(v[3])) + m.emit(0x48) + m.mrsd(lcode(v[4]), addr(v[1]), 1) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PS imm4, ymm, m256, ymm, ymm + if isImm4(v0) && isYMM(v1) && isM256(v2) && isYMM(v3) && isYMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[4]), addr(v[2]), hlcode(v[3])) + m.emit(0x48) + m.mrsd(lcode(v[4]), addr(v[2]), 1) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMIL2PS") + } + return p +} + +// VPERMILPD performs "Permute Double-Precision Floating-Point Values". +// +// Mnemonic : VPERMILPD +// Supported forms : (20 forms) +// +// * VPERMILPD imm8, xmm, xmm [AVX] +// * VPERMILPD xmm, xmm, xmm [AVX] +// * VPERMILPD m128, xmm, xmm [AVX] +// * VPERMILPD imm8, m128, xmm [AVX] +// * VPERMILPD imm8, ymm, ymm [AVX] +// * VPERMILPD ymm, ymm, ymm [AVX] +// * VPERMILPD m256, ymm, ymm [AVX] +// * VPERMILPD imm8, m256, ymm [AVX] +// * VPERMILPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPERMILPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMILPD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPERMILPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMILPD imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMILPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMILPD", 3, Operands { v0, v1, v2 }) + // VPERMILPD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x05) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPERMILPD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x05) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d) + m.emit(0x05) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPERMILPD imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0) + m.emit(0x05) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x05) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x0d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMILPD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x05) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPD imm8, m128/m64bcst, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x05) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x05) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x0d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMILPD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x05) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x0d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMILPD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x05) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMILPD") + } + return p +} + +// VPERMILPS performs "Permute Single-Precision Floating-Point Values". +// +// Mnemonic : VPERMILPS +// Supported forms : (20 forms) +// +// * VPERMILPS imm8, xmm, xmm [AVX] +// * VPERMILPS xmm, xmm, xmm [AVX] +// * VPERMILPS m128, xmm, xmm [AVX] +// * VPERMILPS imm8, m128, xmm [AVX] +// * VPERMILPS imm8, ymm, ymm [AVX] +// * VPERMILPS ymm, ymm, ymm [AVX] +// * VPERMILPS m256, ymm, ymm [AVX] +// * VPERMILPS imm8, m256, ymm [AVX] +// * VPERMILPS imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPERMILPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMILPS imm8, zmm, zmm{k}{z} [AVX512F] +// * VPERMILPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMILPS imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPS imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPS imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPS imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMILPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMILPS", 3, Operands { v0, v1, v2 }) + // VPERMILPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPERMILPS imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPERMILPS imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x0c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMILPS imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPS imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x0c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMILPS imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x0c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMILPS imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMILPS") + } + return p +} + +// VPERMPD performs "Permute Double-Precision Floating-Point Elements". +// +// Mnemonic : VPERMPD +// Supported forms : (10 forms) +// +// * VPERMPD imm8, ymm, ymm [AVX2] +// * VPERMPD imm8, m256, ymm [AVX2] +// * VPERMPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPERMPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMPD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPERMPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMPD", 3, Operands { v0, v1, v2 }) + // VPERMPD imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xfd) + m.emit(0x01) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMPD imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[2]), addr(v[1]), 0) + m.emit(0x01) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMPD imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x01) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMPD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x01) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x16) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMPD imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x01) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMPD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x01) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x16) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMPD") + } + return p +} + +// VPERMPS performs "Permute Single-Precision Floating-Point Elements". +// +// Mnemonic : VPERMPS +// Supported forms : (6 forms) +// +// * VPERMPS ymm, ymm, ymm [AVX2] +// * VPERMPS m256, ymm, ymm [AVX2] +// * VPERMPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMPS", 3, Operands { v0, v1, v2 }) + // VPERMPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x16) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPERMPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x16) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x16) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMPS") + } + return p +} + +// VPERMQ performs "Permute Quadword Integers". +// +// Mnemonic : VPERMQ +// Supported forms : (10 forms) +// +// * VPERMQ imm8, ymm, ymm [AVX2] +// * VPERMQ imm8, m256, ymm [AVX2] +// * VPERMQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPERMQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMQ imm8, zmm, zmm{k}{z} [AVX512F] +// * VPERMQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMQ", 3, Operands { v0, v1, v2 }) + // VPERMQ imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xfd) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMQ imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[2]), addr(v[1]), 0) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMQ imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x36) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMQ imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x36) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMQ imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x36) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMQ imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x36) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMQ") + } + return p +} + +// VPERMT2B performs "Full Permute of Bytes From Two Tables Overwriting a Table". +// +// Mnemonic : VPERMT2B +// Supported forms : (6 forms) +// +// * VPERMT2B xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMT2B m128, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMT2B ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMT2B m256, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMT2B zmm, zmm, zmm{k}{z} [AVX512VBMI] +// * VPERMT2B m512, zmm, zmm{k}{z} [AVX512VBMI] +// +func (self *Program) VPERMT2B(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMT2B", 3, Operands { v0, v1, v2 }) + // VPERMT2B xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2B m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMT2B ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2B m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMT2B zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2B m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMT2B") + } + return p +} + +// VPERMT2D performs "Full Permute of Doublewords From Two Tables Overwriting a Table". +// +// Mnemonic : VPERMT2D +// Supported forms : (6 forms) +// +// * VPERMT2D m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2D zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2D m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2D xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2D m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2D ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMT2D(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMT2D", 3, Operands { v0, v1, v2 }) + // VPERMT2D m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7e) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMT2D zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2D m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7e) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMT2D xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2D m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7e) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMT2D ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMT2D") + } + return p +} + +// VPERMT2PD performs "Full Permute of Double-Precision Floating-Point Values From Two Tables Overwriting a Table". +// +// Mnemonic : VPERMT2PD +// Supported forms : (6 forms) +// +// * VPERMT2PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMT2PD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMT2PD", 3, Operands { v0, v1, v2 }) + // VPERMT2PD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7f) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMT2PD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2PD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7f) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMT2PD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2PD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7f) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMT2PD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMT2PD") + } + return p +} + +// VPERMT2PS performs "Full Permute of Single-Precision Floating-Point Values From Two Tables Overwriting a Table". +// +// Mnemonic : VPERMT2PS +// Supported forms : (6 forms) +// +// * VPERMT2PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMT2PS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMT2PS", 3, Operands { v0, v1, v2 }) + // VPERMT2PS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7f) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMT2PS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2PS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7f) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMT2PS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2PS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7f) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMT2PS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMT2PS") + } + return p +} + +// VPERMT2Q performs "Full Permute of Quadwords From Two Tables Overwriting a Table". +// +// Mnemonic : VPERMT2Q +// Supported forms : (6 forms) +// +// * VPERMT2Q m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2Q zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2Q m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2Q xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2Q m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2Q ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMT2Q(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMT2Q", 3, Operands { v0, v1, v2 }) + // VPERMT2Q m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7e) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMT2Q zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2Q m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7e) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMT2Q xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2Q m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7e) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMT2Q ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMT2Q") + } + return p +} + +// VPERMT2W performs "Full Permute of Words From Two Tables Overwriting a Table". +// +// Mnemonic : VPERMT2W +// Supported forms : (6 forms) +// +// * VPERMT2W zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPERMT2W m512, zmm, zmm{k}{z} [AVX512BW] +// * VPERMT2W xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPERMT2W m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPERMT2W ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPERMT2W m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPERMT2W(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMT2W", 3, Operands { v0, v1, v2 }) + // VPERMT2W zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2W m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMT2W xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2W m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMT2W ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2W m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMT2W") + } + return p +} + +// VPERMW performs "Permute Word Integers". +// +// Mnemonic : VPERMW +// Supported forms : (6 forms) +// +// * VPERMW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPERMW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPERMW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPERMW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPERMW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPERMW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPERMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMW", 3, Operands { v0, v1, v2 }) + // VPERMW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x8d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x8d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x8d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x8d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x8d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x8d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMW") + } + return p +} + +// VPEXPANDD performs "Load Sparse Packed Doubleword Integer Values from Dense Memory/Register". +// +// Mnemonic : VPEXPANDD +// Supported forms : (6 forms) +// +// * VPEXPANDD zmm, zmm{k}{z} [AVX512F] +// * VPEXPANDD m512, zmm{k}{z} [AVX512F] +// * VPEXPANDD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPEXPANDD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPEXPANDD m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VPEXPANDD m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPEXPANDD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPEXPANDD", 2, Operands { v0, v1 }) + // VPEXPANDD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x89) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPEXPANDD m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x89) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPEXPANDD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x89) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPEXPANDD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x89) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPEXPANDD m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x89) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPEXPANDD m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x89) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPEXPANDD") + } + return p +} + +// VPEXPANDQ performs "Load Sparse Packed Quadword Integer Values from Dense Memory/Register". +// +// Mnemonic : VPEXPANDQ +// Supported forms : (6 forms) +// +// * VPEXPANDQ zmm, zmm{k}{z} [AVX512F] +// * VPEXPANDQ m512, zmm{k}{z} [AVX512F] +// * VPEXPANDQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPEXPANDQ ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPEXPANDQ m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VPEXPANDQ m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPEXPANDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPEXPANDQ", 2, Operands { v0, v1 }) + // VPEXPANDQ zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x89) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPEXPANDQ m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x89) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPEXPANDQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x89) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPEXPANDQ ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x89) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPEXPANDQ m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x89) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPEXPANDQ m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x89) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPEXPANDQ") + } + return p +} + +// VPEXTRB performs "Extract Byte". +// +// Mnemonic : VPEXTRB +// Supported forms : (4 forms) +// +// * VPEXTRB imm8, xmm, r32 [AVX] +// * VPEXTRB imm8, xmm, m8 [AVX] +// * VPEXTRB imm8, xmm, r32 [AVX512BW] +// * VPEXTRB imm8, xmm, m8 [AVX512BW] +// +func (self *Program) VPEXTRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPEXTRB", 3, Operands { v0, v1, v2 }) + // VPEXTRB imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x79) + m.emit(0x14) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRB imm8, xmm, m8 + if isImm8(v0) && isXMM(v1) && isM8(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0) + m.emit(0x14) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRB imm8, xmm, r32 + if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit(0x08) + m.emit(0x14) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRB imm8, xmm, m8 + if isImm8(v0) && isEVEXXMM(v1) && isM8(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0) + m.emit(0x14) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPEXTRB") + } + return p +} + +// VPEXTRD performs "Extract Doubleword". +// +// Mnemonic : VPEXTRD +// Supported forms : (4 forms) +// +// * VPEXTRD imm8, xmm, r32 [AVX] +// * VPEXTRD imm8, xmm, m32 [AVX] +// * VPEXTRD imm8, xmm, r32 [AVX512DQ] +// * VPEXTRD imm8, xmm, m32 [AVX512DQ] +// +func (self *Program) VPEXTRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPEXTRD", 3, Operands { v0, v1, v2 }) + // VPEXTRD imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x79) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRD imm8, xmm, m32 + if isImm8(v0) && isXMM(v1) && isM32(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRD imm8, xmm, r32 + if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit(0x08) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRD imm8, xmm, m32 + if isImm8(v0) && isEVEXXMM(v1) && isM32(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[2]), 4) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPEXTRD") + } + return p +} + +// VPEXTRQ performs "Extract Quadword". +// +// Mnemonic : VPEXTRQ +// Supported forms : (4 forms) +// +// * VPEXTRQ imm8, xmm, r64 [AVX] +// * VPEXTRQ imm8, xmm, m64 [AVX] +// * VPEXTRQ imm8, xmm, r64 [AVX512DQ] +// * VPEXTRQ imm8, xmm, m64 [AVX512DQ] +// +func (self *Program) VPEXTRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPEXTRQ", 3, Operands { v0, v1, v2 }) + // VPEXTRQ imm8, xmm, r64 + if isImm8(v0) && isXMM(v1) && isReg64(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0xf9) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRQ imm8, xmm, m64 + if isImm8(v0) && isXMM(v1) && isM64(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[1]), addr(v[2]), 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRQ imm8, xmm, r64 + if isImm8(v0) && isEVEXXMM(v1) && isReg64(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit(0x08) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRQ imm8, xmm, m64 + if isImm8(v0) && isEVEXXMM(v1) && isM64(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[2]), 8) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPEXTRQ") + } + return p +} + +// VPEXTRW performs "Extract Word". +// +// Mnemonic : VPEXTRW +// Supported forms : (4 forms) +// +// * VPEXTRW imm8, xmm, r32 [AVX] +// * VPEXTRW imm8, xmm, m16 [AVX] +// * VPEXTRW imm8, xmm, r32 [AVX512BW] +// * VPEXTRW imm8, xmm, m16 [AVX512BW] +// +func (self *Program) VPEXTRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPEXTRW", 3, Operands { v0, v1, v2 }) + // VPEXTRW imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[1], 0) + m.emit(0xc5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x79) + m.emit(0x15) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRW imm8, xmm, m16 + if isImm8(v0) && isXMM(v1) && isM16(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0) + m.emit(0x15) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRW imm8, xmm, r32 + if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit(0x08) + m.emit(0x15) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit(0x08) + m.emit(0xc5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRW imm8, xmm, m16 + if isImm8(v0) && isEVEXXMM(v1) && isM16(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0) + m.emit(0x15) + m.mrsd(lcode(v[1]), addr(v[2]), 2) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPEXTRW") + } + return p +} + +// VPGATHERDD performs "Gather Packed Doubleword Values Using Signed Doubleword Indices". +// +// Mnemonic : VPGATHERDD +// Supported forms : (5 forms) +// +// * VPGATHERDD xmm, vm32x, xmm [AVX2] +// * VPGATHERDD ymm, vm32y, ymm [AVX2] +// * VPGATHERDD vm32z, zmm{k} [AVX512F] +// * VPGATHERDD vm32x, xmm{k} [AVX512F,AVX512VL] +// * VPGATHERDD vm32y, ymm{k} [AVX512F,AVX512VL] +// +func (self *Program) VPGATHERDD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VPGATHERDD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VPGATHERDD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VPGATHERDD takes 2 or 3 operands") + } + // VPGATHERDD xmm, vm32x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x90) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERDD ymm, vm32y, ymm + if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x90) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERDD vm32z, zmm{k} + if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPGATHERDD vm32x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPGATHERDD vm32y, ymm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPGATHERDD") + } + return p +} + +// VPGATHERDQ performs "Gather Packed Quadword Values Using Signed Doubleword Indices". +// +// Mnemonic : VPGATHERDQ +// Supported forms : (5 forms) +// +// * VPGATHERDQ xmm, vm32x, xmm [AVX2] +// * VPGATHERDQ ymm, vm32x, ymm [AVX2] +// * VPGATHERDQ vm32y, zmm{k} [AVX512F] +// * VPGATHERDQ vm32x, xmm{k} [AVX512F,AVX512VL] +// * VPGATHERDQ vm32x, ymm{k} [AVX512F,AVX512VL] +// +func (self *Program) VPGATHERDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VPGATHERDQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VPGATHERDQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VPGATHERDQ takes 2 or 3 operands") + } + // VPGATHERDQ xmm, vm32x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x90) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERDQ ymm, vm32x, ymm + if len(vv) == 1 && isYMM(v0) && isVMX(v1) && isYMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x90) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERDQ vm32y, zmm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isZMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPGATHERDQ vm32x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPGATHERDQ vm32x, ymm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isYMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPGATHERDQ") + } + return p +} + +// VPGATHERQD performs "Gather Packed Doubleword Values Using Signed Quadword Indices". +// +// Mnemonic : VPGATHERQD +// Supported forms : (5 forms) +// +// * VPGATHERQD xmm, vm64x, xmm [AVX2] +// * VPGATHERQD xmm, vm64y, xmm [AVX2] +// * VPGATHERQD vm64z, ymm{k} [AVX512F] +// * VPGATHERQD vm64x, xmm{k} [AVX512F,AVX512VL] +// * VPGATHERQD vm64y, xmm{k} [AVX512F,AVX512VL] +// +func (self *Program) VPGATHERQD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VPGATHERQD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VPGATHERQD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VPGATHERQD takes 2 or 3 operands") + } + // VPGATHERQD xmm, vm64x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x91) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERQD xmm, vm64y, xmm + if len(vv) == 1 && isXMM(v0) && isVMY(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x91) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERQD vm64z, ymm{k} + if len(vv) == 0 && isVMZ(v0) && isYMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x91) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPGATHERQD vm64x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x91) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPGATHERQD vm64y, xmm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x91) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPGATHERQD") + } + return p +} + +// VPGATHERQQ performs "Gather Packed Quadword Values Using Signed Quadword Indices". +// +// Mnemonic : VPGATHERQQ +// Supported forms : (5 forms) +// +// * VPGATHERQQ xmm, vm64x, xmm [AVX2] +// * VPGATHERQQ ymm, vm64y, ymm [AVX2] +// * VPGATHERQQ vm64z, zmm{k} [AVX512F] +// * VPGATHERQQ vm64x, xmm{k} [AVX512F,AVX512VL] +// * VPGATHERQQ vm64y, ymm{k} [AVX512F,AVX512VL] +// +func (self *Program) VPGATHERQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VPGATHERQQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VPGATHERQQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VPGATHERQQ takes 2 or 3 operands") + } + // VPGATHERQQ xmm, vm64x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x91) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERQQ ymm, vm64y, ymm + if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x91) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERQQ vm64z, zmm{k} + if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x91) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPGATHERQQ vm64x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x91) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPGATHERQQ vm64y, ymm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x91) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPGATHERQQ") + } + return p +} + +// VPHADDBD performs "Packed Horizontal Add Signed Byte to Signed Doubleword". +// +// Mnemonic : VPHADDBD +// Supported forms : (2 forms) +// +// * VPHADDBD xmm, xmm [XOP] +// * VPHADDBD m128, xmm [XOP] +// +func (self *Program) VPHADDBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDBD", 2, Operands { v0, v1 }) + // VPHADDBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDBD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xc2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDBD") + } + return p +} + +// VPHADDBQ performs "Packed Horizontal Add Signed Byte to Signed Quadword". +// +// Mnemonic : VPHADDBQ +// Supported forms : (2 forms) +// +// * VPHADDBQ xmm, xmm [XOP] +// * VPHADDBQ m128, xmm [XOP] +// +func (self *Program) VPHADDBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDBQ", 2, Operands { v0, v1 }) + // VPHADDBQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xc3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDBQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xc3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDBQ") + } + return p +} + +// VPHADDBW performs "Packed Horizontal Add Signed Byte to Signed Word". +// +// Mnemonic : VPHADDBW +// Supported forms : (2 forms) +// +// * VPHADDBW xmm, xmm [XOP] +// * VPHADDBW m128, xmm [XOP] +// +func (self *Program) VPHADDBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDBW", 2, Operands { v0, v1 }) + // VPHADDBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xc1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDBW") + } + return p +} + +// VPHADDD performs "Packed Horizontal Add Doubleword Integer". +// +// Mnemonic : VPHADDD +// Supported forms : (4 forms) +// +// * VPHADDD xmm, xmm, xmm [AVX] +// * VPHADDD m128, xmm, xmm [AVX] +// * VPHADDD ymm, ymm, ymm [AVX2] +// * VPHADDD m256, ymm, ymm [AVX2] +// +func (self *Program) VPHADDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPHADDD", 3, Operands { v0, v1, v2 }) + // VPHADDD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x02) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHADDD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x02) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPHADDD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x02) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHADDD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x02) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDD") + } + return p +} + +// VPHADDDQ performs "Packed Horizontal Add Signed Doubleword to Signed Quadword". +// +// Mnemonic : VPHADDDQ +// Supported forms : (2 forms) +// +// * VPHADDDQ xmm, xmm [XOP] +// * VPHADDDQ m128, xmm [XOP] +// +func (self *Program) VPHADDDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDDQ", 2, Operands { v0, v1 }) + // VPHADDDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xcb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xcb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDDQ") + } + return p +} + +// VPHADDSW performs "Packed Horizontal Add Signed Word Integers with Signed Saturation". +// +// Mnemonic : VPHADDSW +// Supported forms : (4 forms) +// +// * VPHADDSW xmm, xmm, xmm [AVX] +// * VPHADDSW m128, xmm, xmm [AVX] +// * VPHADDSW ymm, ymm, ymm [AVX2] +// * VPHADDSW m256, ymm, ymm [AVX2] +// +func (self *Program) VPHADDSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPHADDSW", 3, Operands { v0, v1, v2 }) + // VPHADDSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x03) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHADDSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPHADDSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x03) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHADDSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDSW") + } + return p +} + +// VPHADDUBD performs "Packed Horizontal Add Unsigned Byte to Doubleword". +// +// Mnemonic : VPHADDUBD +// Supported forms : (2 forms) +// +// * VPHADDUBD xmm, xmm [XOP] +// * VPHADDUBD m128, xmm [XOP] +// +func (self *Program) VPHADDUBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDUBD", 2, Operands { v0, v1 }) + // VPHADDUBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDUBD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xd2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDUBD") + } + return p +} + +// VPHADDUBQ performs "Packed Horizontal Add Unsigned Byte to Quadword". +// +// Mnemonic : VPHADDUBQ +// Supported forms : (2 forms) +// +// * VPHADDUBQ xmm, xmm [XOP] +// * VPHADDUBQ m128, xmm [XOP] +// +func (self *Program) VPHADDUBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDUBQ", 2, Operands { v0, v1 }) + // VPHADDUBQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDUBQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xd3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDUBQ") + } + return p +} + +// VPHADDUBW performs "Packed Horizontal Add Unsigned Byte to Word". +// +// Mnemonic : VPHADDUBW +// Supported forms : (2 forms) +// +// * VPHADDUBW xmm, xmm [XOP] +// * VPHADDUBW m128, xmm [XOP] +// +func (self *Program) VPHADDUBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDUBW", 2, Operands { v0, v1 }) + // VPHADDUBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDUBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xd1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDUBW") + } + return p +} + +// VPHADDUDQ performs "Packed Horizontal Add Unsigned Doubleword to Quadword". +// +// Mnemonic : VPHADDUDQ +// Supported forms : (2 forms) +// +// * VPHADDUDQ xmm, xmm [XOP] +// * VPHADDUDQ m128, xmm [XOP] +// +func (self *Program) VPHADDUDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDUDQ", 2, Operands { v0, v1 }) + // VPHADDUDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDUDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xdb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDUDQ") + } + return p +} + +// VPHADDUWD performs "Packed Horizontal Add Unsigned Word to Doubleword". +// +// Mnemonic : VPHADDUWD +// Supported forms : (2 forms) +// +// * VPHADDUWD xmm, xmm [XOP] +// * VPHADDUWD m128, xmm [XOP] +// +func (self *Program) VPHADDUWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDUWD", 2, Operands { v0, v1 }) + // VPHADDUWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xd6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDUWD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xd6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDUWD") + } + return p +} + +// VPHADDUWQ performs "Packed Horizontal Add Unsigned Word to Quadword". +// +// Mnemonic : VPHADDUWQ +// Supported forms : (2 forms) +// +// * VPHADDUWQ xmm, xmm [XOP] +// * VPHADDUWQ m128, xmm [XOP] +// +func (self *Program) VPHADDUWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDUWQ", 2, Operands { v0, v1 }) + // VPHADDUWQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xd7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDUWQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xd7) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDUWQ") + } + return p +} + +// VPHADDW performs "Packed Horizontal Add Word Integers". +// +// Mnemonic : VPHADDW +// Supported forms : (4 forms) +// +// * VPHADDW xmm, xmm, xmm [AVX] +// * VPHADDW m128, xmm, xmm [AVX] +// * VPHADDW ymm, ymm, ymm [AVX2] +// * VPHADDW m256, ymm, ymm [AVX2] +// +func (self *Program) VPHADDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPHADDW", 3, Operands { v0, v1, v2 }) + // VPHADDW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHADDW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPHADDW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHADDW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDW") + } + return p +} + +// VPHADDWD performs "Packed Horizontal Add Signed Word to Signed Doubleword". +// +// Mnemonic : VPHADDWD +// Supported forms : (2 forms) +// +// * VPHADDWD xmm, xmm [XOP] +// * VPHADDWD m128, xmm [XOP] +// +func (self *Program) VPHADDWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDWD", 2, Operands { v0, v1 }) + // VPHADDWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDWD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xc6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDWD") + } + return p +} + +// VPHADDWQ performs "Packed Horizontal Add Signed Word to Signed Quadword". +// +// Mnemonic : VPHADDWQ +// Supported forms : (2 forms) +// +// * VPHADDWQ xmm, xmm [XOP] +// * VPHADDWQ m128, xmm [XOP] +// +func (self *Program) VPHADDWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDWQ", 2, Operands { v0, v1 }) + // VPHADDWQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xc7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDWQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xc7) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDWQ") + } + return p +} + +// VPHMINPOSUW performs "Packed Horizontal Minimum of Unsigned Word Integers". +// +// Mnemonic : VPHMINPOSUW +// Supported forms : (2 forms) +// +// * VPHMINPOSUW xmm, xmm [AVX] +// * VPHMINPOSUW m128, xmm [AVX] +// +func (self *Program) VPHMINPOSUW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHMINPOSUW", 2, Operands { v0, v1 }) + // VPHMINPOSUW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x41) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHMINPOSUW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x41) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHMINPOSUW") + } + return p +} + +// VPHSUBBW performs "Packed Horizontal Subtract Signed Byte to Signed Word". +// +// Mnemonic : VPHSUBBW +// Supported forms : (2 forms) +// +// * VPHSUBBW xmm, xmm [XOP] +// * VPHSUBBW m128, xmm [XOP] +// +func (self *Program) VPHSUBBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHSUBBW", 2, Operands { v0, v1 }) + // VPHSUBBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHSUBBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHSUBBW") + } + return p +} + +// VPHSUBD performs "Packed Horizontal Subtract Doubleword Integers". +// +// Mnemonic : VPHSUBD +// Supported forms : (4 forms) +// +// * VPHSUBD xmm, xmm, xmm [AVX] +// * VPHSUBD m128, xmm, xmm [AVX] +// * VPHSUBD ymm, ymm, ymm [AVX2] +// * VPHSUBD m256, ymm, ymm [AVX2] +// +func (self *Program) VPHSUBD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPHSUBD", 3, Operands { v0, v1, v2 }) + // VPHSUBD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x06) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHSUBD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x06) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPHSUBD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x06) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHSUBD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x06) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHSUBD") + } + return p +} + +// VPHSUBDQ performs "Packed Horizontal Subtract Signed Doubleword to Signed Quadword". +// +// Mnemonic : VPHSUBDQ +// Supported forms : (2 forms) +// +// * VPHSUBDQ xmm, xmm [XOP] +// * VPHSUBDQ m128, xmm [XOP] +// +func (self *Program) VPHSUBDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHSUBDQ", 2, Operands { v0, v1 }) + // VPHSUBDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHSUBDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHSUBDQ") + } + return p +} + +// VPHSUBSW performs "Packed Horizontal Subtract Signed Word Integers with Signed Saturation". +// +// Mnemonic : VPHSUBSW +// Supported forms : (4 forms) +// +// * VPHSUBSW xmm, xmm, xmm [AVX] +// * VPHSUBSW m128, xmm, xmm [AVX] +// * VPHSUBSW ymm, ymm, ymm [AVX2] +// * VPHSUBSW m256, ymm, ymm [AVX2] +// +func (self *Program) VPHSUBSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPHSUBSW", 3, Operands { v0, v1, v2 }) + // VPHSUBSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x07) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHSUBSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x07) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPHSUBSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x07) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHSUBSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x07) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHSUBSW") + } + return p +} + +// VPHSUBW performs "Packed Horizontal Subtract Word Integers". +// +// Mnemonic : VPHSUBW +// Supported forms : (4 forms) +// +// * VPHSUBW xmm, xmm, xmm [AVX] +// * VPHSUBW m128, xmm, xmm [AVX] +// * VPHSUBW ymm, ymm, ymm [AVX2] +// * VPHSUBW m256, ymm, ymm [AVX2] +// +func (self *Program) VPHSUBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPHSUBW", 3, Operands { v0, v1, v2 }) + // VPHSUBW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x05) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHSUBW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x05) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPHSUBW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x05) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHSUBW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x05) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHSUBW") + } + return p +} + +// VPHSUBWD performs "Packed Horizontal Subtract Signed Word to Signed Doubleword". +// +// Mnemonic : VPHSUBWD +// Supported forms : (2 forms) +// +// * VPHSUBWD xmm, xmm [XOP] +// * VPHSUBWD m128, xmm [XOP] +// +func (self *Program) VPHSUBWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHSUBWD", 2, Operands { v0, v1 }) + // VPHSUBWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHSUBWD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHSUBWD") + } + return p +} + +// VPINSRB performs "Insert Byte". +// +// Mnemonic : VPINSRB +// Supported forms : (4 forms) +// +// * VPINSRB imm8, r32, xmm, xmm [AVX] +// * VPINSRB imm8, m8, xmm, xmm [AVX] +// * VPINSRB imm8, r32, xmm, xmm [AVX512BW] +// * VPINSRB imm8, m8, xmm, xmm [AVX512BW] +// +func (self *Program) VPINSRB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPINSRB", 4, Operands { v0, v1, v2, v3 }) + // VPINSRB imm8, r32, xmm, xmm + if isImm8(v0) && isReg32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x20) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRB imm8, m8, xmm, xmm + if isImm8(v0) && isM8(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x20) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRB imm8, r32, xmm, xmm + if isImm8(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00) + m.emit(0x20) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRB imm8, m8, xmm, xmm + if isImm8(v0) && isM8(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x20) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPINSRB") + } + return p +} + +// VPINSRD performs "Insert Doubleword". +// +// Mnemonic : VPINSRD +// Supported forms : (4 forms) +// +// * VPINSRD imm8, r32, xmm, xmm [AVX] +// * VPINSRD imm8, m32, xmm, xmm [AVX] +// * VPINSRD imm8, r32, xmm, xmm [AVX512DQ] +// * VPINSRD imm8, m32, xmm, xmm [AVX512DQ] +// +func (self *Program) VPINSRD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPINSRD", 4, Operands { v0, v1, v2, v3 }) + // VPINSRD imm8, r32, xmm, xmm + if isImm8(v0) && isReg32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x22) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRD imm8, m32, xmm, xmm + if isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x22) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRD imm8, r32, xmm, xmm + if isImm8(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00) + m.emit(0x22) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRD imm8, m32, xmm, xmm + if isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x22) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPINSRD") + } + return p +} + +// VPINSRQ performs "Insert Quadword". +// +// Mnemonic : VPINSRQ +// Supported forms : (4 forms) +// +// * VPINSRQ imm8, r64, xmm, xmm [AVX] +// * VPINSRQ imm8, m64, xmm, xmm [AVX] +// * VPINSRQ imm8, r64, xmm, xmm [AVX512DQ] +// * VPINSRQ imm8, m64, xmm, xmm [AVX512DQ] +// +func (self *Program) VPINSRQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPINSRQ", 4, Operands { v0, v1, v2, v3 }) + // VPINSRQ imm8, r64, xmm, xmm + if isImm8(v0) && isReg64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x22) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRQ imm8, m64, xmm, xmm + if isImm8(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x22) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRQ imm8, r64, xmm, xmm + if isImm8(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00) + m.emit(0x22) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRQ imm8, m64, xmm, xmm + if isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x22) + m.mrsd(lcode(v[3]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPINSRQ") + } + return p +} + +// VPINSRW performs "Insert Word". +// +// Mnemonic : VPINSRW +// Supported forms : (4 forms) +// +// * VPINSRW imm8, r32, xmm, xmm [AVX] +// * VPINSRW imm8, m16, xmm, xmm [AVX] +// * VPINSRW imm8, r32, xmm, xmm [AVX512BW] +// * VPINSRW imm8, m16, xmm, xmm [AVX512BW] +// +func (self *Program) VPINSRW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPINSRW", 4, Operands { v0, v1, v2, v3 }) + // VPINSRW imm8, r32, xmm, xmm + if isImm8(v0) && isReg32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRW imm8, m16, xmm, xmm + if isImm8(v0) && isM16(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc4) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRW imm8, r32, xmm, xmm + if isImm8(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRW imm8, m16, xmm, xmm + if isImm8(v0) && isM16(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0xc4) + m.mrsd(lcode(v[3]), addr(v[1]), 2) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPINSRW") + } + return p +} + +// VPLZCNTD performs "Count the Number of Leading Zero Bits for Packed Doubleword Values". +// +// Mnemonic : VPLZCNTD +// Supported forms : (6 forms) +// +// * VPLZCNTD m128/m32bcst, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTD m256/m32bcst, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTD m512/m32bcst, zmm{k}{z} [AVX512CD] +// * VPLZCNTD xmm, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTD ymm, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTD zmm, zmm{k}{z} [AVX512CD] +// +func (self *Program) VPLZCNTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPLZCNTD", 2, Operands { v0, v1 }) + // VPLZCNTD m128/m32bcst, xmm{k}{z} + if isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPLZCNTD m256/m32bcst, ymm{k}{z} + if isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPLZCNTD m512/m32bcst, zmm{k}{z} + if isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPLZCNTD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPLZCNTD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPLZCNTD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPLZCNTD") + } + return p +} + +// VPLZCNTQ performs "Count the Number of Leading Zero Bits for Packed Quadword Values". +// +// Mnemonic : VPLZCNTQ +// Supported forms : (6 forms) +// +// * VPLZCNTQ m128/m64bcst, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTQ m256/m64bcst, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTQ m512/m64bcst, zmm{k}{z} [AVX512CD] +// * VPLZCNTQ xmm, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTQ ymm, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTQ zmm, zmm{k}{z} [AVX512CD] +// +func (self *Program) VPLZCNTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPLZCNTQ", 2, Operands { v0, v1 }) + // VPLZCNTQ m128/m64bcst, xmm{k}{z} + if isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPLZCNTQ m256/m64bcst, ymm{k}{z} + if isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPLZCNTQ m512/m64bcst, zmm{k}{z} + if isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPLZCNTQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPLZCNTQ ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPLZCNTQ zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPLZCNTQ") + } + return p +} + +// VPMACSDD performs "Packed Multiply Accumulate Signed Doubleword to Signed Doubleword". +// +// Mnemonic : VPMACSDD +// Supported forms : (2 forms) +// +// * VPMACSDD xmm, xmm, xmm, xmm [XOP] +// * VPMACSDD xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSDD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSDD", 4, Operands { v0, v1, v2, v3 }) + // VPMACSDD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSDD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x9e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSDD") + } + return p +} + +// VPMACSDQH performs "Packed Multiply Accumulate Signed High Doubleword to Signed Quadword". +// +// Mnemonic : VPMACSDQH +// Supported forms : (2 forms) +// +// * VPMACSDQH xmm, xmm, xmm, xmm [XOP] +// * VPMACSDQH xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSDQH(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSDQH", 4, Operands { v0, v1, v2, v3 }) + // VPMACSDQH xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSDQH xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x9f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSDQH") + } + return p +} + +// VPMACSDQL performs "Packed Multiply Accumulate Signed Low Doubleword to Signed Quadword". +// +// Mnemonic : VPMACSDQL +// Supported forms : (2 forms) +// +// * VPMACSDQL xmm, xmm, xmm, xmm [XOP] +// * VPMACSDQL xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSDQL(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSDQL", 4, Operands { v0, v1, v2, v3 }) + // VPMACSDQL xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x97) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSDQL xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x97) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSDQL") + } + return p +} + +// VPMACSSDD performs "Packed Multiply Accumulate with Saturation Signed Doubleword to Signed Doubleword". +// +// Mnemonic : VPMACSSDD +// Supported forms : (2 forms) +// +// * VPMACSSDD xmm, xmm, xmm, xmm [XOP] +// * VPMACSSDD xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSSDD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSSDD", 4, Operands { v0, v1, v2, v3 }) + // VPMACSSDD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x8e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSSDD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x8e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSSDD") + } + return p +} + +// VPMACSSDQH performs "Packed Multiply Accumulate with Saturation Signed High Doubleword to Signed Quadword". +// +// Mnemonic : VPMACSSDQH +// Supported forms : (2 forms) +// +// * VPMACSSDQH xmm, xmm, xmm, xmm [XOP] +// * VPMACSSDQH xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSSDQH(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSSDQH", 4, Operands { v0, v1, v2, v3 }) + // VPMACSSDQH xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x8f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSSDQH xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x8f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSSDQH") + } + return p +} + +// VPMACSSDQL performs "Packed Multiply Accumulate with Saturation Signed Low Doubleword to Signed Quadword". +// +// Mnemonic : VPMACSSDQL +// Supported forms : (2 forms) +// +// * VPMACSSDQL xmm, xmm, xmm, xmm [XOP] +// * VPMACSSDQL xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSSDQL(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSSDQL", 4, Operands { v0, v1, v2, v3 }) + // VPMACSSDQL xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x87) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSSDQL xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x87) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSSDQL") + } + return p +} + +// VPMACSSWD performs "Packed Multiply Accumulate with Saturation Signed Word to Signed Doubleword". +// +// Mnemonic : VPMACSSWD +// Supported forms : (2 forms) +// +// * VPMACSSWD xmm, xmm, xmm, xmm [XOP] +// * VPMACSSWD xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSSWD", 4, Operands { v0, v1, v2, v3 }) + // VPMACSSWD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x86) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSSWD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x86) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSSWD") + } + return p +} + +// VPMACSSWW performs "Packed Multiply Accumulate with Saturation Signed Word to Signed Word". +// +// Mnemonic : VPMACSSWW +// Supported forms : (2 forms) +// +// * VPMACSSWW xmm, xmm, xmm, xmm [XOP] +// * VPMACSSWW xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSSWW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSSWW", 4, Operands { v0, v1, v2, v3 }) + // VPMACSSWW xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x85) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSSWW xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x85) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSSWW") + } + return p +} + +// VPMACSWD performs "Packed Multiply Accumulate Signed Word to Signed Doubleword". +// +// Mnemonic : VPMACSWD +// Supported forms : (2 forms) +// +// * VPMACSWD xmm, xmm, xmm, xmm [XOP] +// * VPMACSWD xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSWD", 4, Operands { v0, v1, v2, v3 }) + // VPMACSWD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x96) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSWD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x96) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSWD") + } + return p +} + +// VPMACSWW performs "Packed Multiply Accumulate Signed Word to Signed Word". +// +// Mnemonic : VPMACSWW +// Supported forms : (2 forms) +// +// * VPMACSWW xmm, xmm, xmm, xmm [XOP] +// * VPMACSWW xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSWW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSWW", 4, Operands { v0, v1, v2, v3 }) + // VPMACSWW xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x95) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSWW xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x95) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSWW") + } + return p +} + +// VPMADCSSWD performs "Packed Multiply Add Accumulate with Saturation Signed Word to Signed Doubleword". +// +// Mnemonic : VPMADCSSWD +// Supported forms : (2 forms) +// +// * VPMADCSSWD xmm, xmm, xmm, xmm [XOP] +// * VPMADCSSWD xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMADCSSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMADCSSWD", 4, Operands { v0, v1, v2, v3 }) + // VPMADCSSWD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMADCSSWD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xa6) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMADCSSWD") + } + return p +} + +// VPMADCSWD performs "Packed Multiply Add Accumulate Signed Word to Signed Doubleword". +// +// Mnemonic : VPMADCSWD +// Supported forms : (2 forms) +// +// * VPMADCSWD xmm, xmm, xmm, xmm [XOP] +// * VPMADCSWD xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMADCSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMADCSWD", 4, Operands { v0, v1, v2, v3 }) + // VPMADCSWD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMADCSWD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xb6) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMADCSWD") + } + return p +} + +// VPMADD52HUQ performs "Packed Multiply of Unsigned 52-bit Unsigned Integers and Add High 52-bit Products to Quadword Accumulators". +// +// Mnemonic : VPMADD52HUQ +// Supported forms : (6 forms) +// +// * VPMADD52HUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52HUQ xmm, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52HUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52HUQ ymm, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52HUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512IFMA] +// * VPMADD52HUQ zmm, zmm, zmm{k}{z} [AVX512IFMA] +// +func (self *Program) VPMADD52HUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMADD52HUQ", 3, Operands { v0, v1, v2 }) + // VPMADD52HUQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb5) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMADD52HUQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADD52HUQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb5) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMADD52HUQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADD52HUQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512IFMA) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb5) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMADD52HUQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512IFMA) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMADD52HUQ") + } + return p +} + +// VPMADD52LUQ performs "Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit Products to Quadword Accumulators". +// +// Mnemonic : VPMADD52LUQ +// Supported forms : (6 forms) +// +// * VPMADD52LUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52LUQ xmm, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52LUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52LUQ ymm, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52LUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512IFMA] +// * VPMADD52LUQ zmm, zmm, zmm{k}{z} [AVX512IFMA] +// +func (self *Program) VPMADD52LUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMADD52LUQ", 3, Operands { v0, v1, v2 }) + // VPMADD52LUQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb4) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMADD52LUQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADD52LUQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb4) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMADD52LUQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADD52LUQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512IFMA) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb4) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMADD52LUQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512IFMA) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMADD52LUQ") + } + return p +} + +// VPMADDUBSW performs "Multiply and Add Packed Signed and Unsigned Byte Integers". +// +// Mnemonic : VPMADDUBSW +// Supported forms : (10 forms) +// +// * VPMADDUBSW xmm, xmm, xmm [AVX] +// * VPMADDUBSW m128, xmm, xmm [AVX] +// * VPMADDUBSW ymm, ymm, ymm [AVX2] +// * VPMADDUBSW m256, ymm, ymm [AVX2] +// * VPMADDUBSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMADDUBSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMADDUBSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMADDUBSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMADDUBSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMADDUBSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMADDUBSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMADDUBSW", 3, Operands { v0, v1, v2 }) + // VPMADDUBSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDUBSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMADDUBSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDUBSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMADDUBSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDUBSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMADDUBSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDUBSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMADDUBSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDUBSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMADDUBSW") + } + return p +} + +// VPMADDWD performs "Multiply and Add Packed Signed Word Integers". +// +// Mnemonic : VPMADDWD +// Supported forms : (10 forms) +// +// * VPMADDWD xmm, xmm, xmm [AVX] +// * VPMADDWD m128, xmm, xmm [AVX] +// * VPMADDWD ymm, ymm, ymm [AVX2] +// * VPMADDWD m256, ymm, ymm [AVX2] +// * VPMADDWD zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMADDWD m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMADDWD xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMADDWD m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMADDWD ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMADDWD m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMADDWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMADDWD", 3, Operands { v0, v1, v2 }) + // VPMADDWD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDWD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMADDWD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDWD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMADDWD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDWD m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMADDWD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDWD m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMADDWD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDWD m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMADDWD") + } + return p +} + +// VPMASKMOVD performs "Conditional Move Packed Doubleword Integers". +// +// Mnemonic : VPMASKMOVD +// Supported forms : (4 forms) +// +// * VPMASKMOVD m128, xmm, xmm [AVX2] +// * VPMASKMOVD m256, ymm, ymm [AVX2] +// * VPMASKMOVD xmm, xmm, m128 [AVX2] +// * VPMASKMOVD ymm, ymm, m256 [AVX2] +// +func (self *Program) VPMASKMOVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMASKMOVD", 3, Operands { v0, v1, v2 }) + // VPMASKMOVD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x8c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMASKMOVD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x8c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMASKMOVD xmm, xmm, m128 + if isXMM(v0) && isXMM(v1) && isM128(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x8e) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + // VPMASKMOVD ymm, ymm, m256 + if isYMM(v0) && isYMM(v1) && isM256(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x8e) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPMASKMOVD") + } + return p +} + +// VPMASKMOVQ performs "Conditional Move Packed Quadword Integers". +// +// Mnemonic : VPMASKMOVQ +// Supported forms : (4 forms) +// +// * VPMASKMOVQ m128, xmm, xmm [AVX2] +// * VPMASKMOVQ m256, ymm, ymm [AVX2] +// * VPMASKMOVQ xmm, xmm, m128 [AVX2] +// * VPMASKMOVQ ymm, ymm, m256 [AVX2] +// +func (self *Program) VPMASKMOVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMASKMOVQ", 3, Operands { v0, v1, v2 }) + // VPMASKMOVQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x8c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMASKMOVQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x8c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMASKMOVQ xmm, xmm, m128 + if isXMM(v0) && isXMM(v1) && isM128(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x8e) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + // VPMASKMOVQ ymm, ymm, m256 + if isYMM(v0) && isYMM(v1) && isM256(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x8e) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPMASKMOVQ") + } + return p +} + +// VPMAXSB performs "Maximum of Packed Signed Byte Integers". +// +// Mnemonic : VPMAXSB +// Supported forms : (10 forms) +// +// * VPMAXSB xmm, xmm, xmm [AVX] +// * VPMAXSB m128, xmm, xmm [AVX] +// * VPMAXSB ymm, ymm, ymm [AVX2] +// * VPMAXSB m256, ymm, ymm [AVX2] +// * VPMAXSB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXSB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMAXSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXSB", 3, Operands { v0, v1, v2 }) + // VPMAXSB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x3c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXSB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x3c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXSB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXSB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXSB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXSB") + } + return p +} + +// VPMAXSD performs "Maximum of Packed Signed Doubleword Integers". +// +// Mnemonic : VPMAXSD +// Supported forms : (10 forms) +// +// * VPMAXSD xmm, xmm, xmm [AVX] +// * VPMAXSD m128, xmm, xmm [AVX] +// * VPMAXSD ymm, ymm, ymm [AVX2] +// * VPMAXSD m256, ymm, ymm [AVX2] +// * VPMAXSD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMAXSD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMAXSD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXSD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXSD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMAXSD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMAXSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXSD", 3, Operands { v0, v1, v2 }) + // VPMAXSD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXSD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXSD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXSD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXSD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMAXSD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXSD") + } + return p +} + +// VPMAXSQ performs "Maximum of Packed Signed Quadword Integers". +// +// Mnemonic : VPMAXSQ +// Supported forms : (6 forms) +// +// * VPMAXSQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMAXSQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMAXSQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXSQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXSQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMAXSQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMAXSQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXSQ", 3, Operands { v0, v1, v2 }) + // VPMAXSQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXSQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXSQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMAXSQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXSQ") + } + return p +} + +// VPMAXSW performs "Maximum of Packed Signed Word Integers". +// +// Mnemonic : VPMAXSW +// Supported forms : (10 forms) +// +// * VPMAXSW xmm, xmm, xmm [AVX] +// * VPMAXSW m128, xmm, xmm [AVX] +// * VPMAXSW ymm, ymm, ymm [AVX2] +// * VPMAXSW m256, ymm, ymm [AVX2] +// * VPMAXSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMAXSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXSW", 3, Operands { v0, v1, v2 }) + // VPMAXSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xee) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xee) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xee) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xee) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xee) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xee) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xee) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xee) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xee) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xee) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXSW") + } + return p +} + +// VPMAXUB performs "Maximum of Packed Unsigned Byte Integers". +// +// Mnemonic : VPMAXUB +// Supported forms : (10 forms) +// +// * VPMAXUB xmm, xmm, xmm [AVX] +// * VPMAXUB m128, xmm, xmm [AVX] +// * VPMAXUB ymm, ymm, ymm [AVX2] +// * VPMAXUB m256, ymm, ymm [AVX2] +// * VPMAXUB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXUB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXUB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXUB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXUB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXUB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMAXUB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXUB", 3, Operands { v0, v1, v2 }) + // VPMAXUB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xde) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xde) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXUB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xde) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xde) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXUB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xde) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xde) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXUB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xde) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xde) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXUB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xde) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xde) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXUB") + } + return p +} + +// VPMAXUD performs "Maximum of Packed Unsigned Doubleword Integers". +// +// Mnemonic : VPMAXUD +// Supported forms : (10 forms) +// +// * VPMAXUD xmm, xmm, xmm [AVX] +// * VPMAXUD m128, xmm, xmm [AVX] +// * VPMAXUD ymm, ymm, ymm [AVX2] +// * VPMAXUD m256, ymm, ymm [AVX2] +// * VPMAXUD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMAXUD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMAXUD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXUD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXUD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMAXUD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMAXUD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXUD", 3, Operands { v0, v1, v2 }) + // VPMAXUD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXUD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXUD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXUD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXUD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMAXUD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXUD") + } + return p +} + +// VPMAXUQ performs "Maximum of Packed Unsigned Quadword Integers". +// +// Mnemonic : VPMAXUQ +// Supported forms : (6 forms) +// +// * VPMAXUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMAXUQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMAXUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXUQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMAXUQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMAXUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXUQ", 3, Operands { v0, v1, v2 }) + // VPMAXUQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXUQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXUQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMAXUQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXUQ") + } + return p +} + +// VPMAXUW performs "Maximum of Packed Unsigned Word Integers". +// +// Mnemonic : VPMAXUW +// Supported forms : (10 forms) +// +// * VPMAXUW xmm, xmm, xmm [AVX] +// * VPMAXUW m128, xmm, xmm [AVX] +// * VPMAXUW ymm, ymm, ymm [AVX2] +// * VPMAXUW m256, ymm, ymm [AVX2] +// * VPMAXUW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXUW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXUW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXUW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXUW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXUW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMAXUW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXUW", 3, Operands { v0, v1, v2 }) + // VPMAXUW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXUW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXUW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3e) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXUW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3e) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXUW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3e) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXUW") + } + return p +} + +// VPMINSB performs "Minimum of Packed Signed Byte Integers". +// +// Mnemonic : VPMINSB +// Supported forms : (10 forms) +// +// * VPMINSB xmm, xmm, xmm [AVX] +// * VPMINSB m128, xmm, xmm [AVX] +// * VPMINSB ymm, ymm, ymm [AVX2] +// * VPMINSB m256, ymm, ymm [AVX2] +// * VPMINSB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMINSB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMINSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMINSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMINSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINSB", 3, Operands { v0, v1, v2 }) + // VPMINSB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x38) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x38) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINSB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x38) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x38) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINSB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x38) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x38) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINSB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x38) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x38) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINSB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x38) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x38) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINSB") + } + return p +} + +// VPMINSD performs "Minimum of Packed Signed Doubleword Integers". +// +// Mnemonic : VPMINSD +// Supported forms : (10 forms) +// +// * VPMINSD xmm, xmm, xmm [AVX] +// * VPMINSD m128, xmm, xmm [AVX] +// * VPMINSD ymm, ymm, ymm [AVX2] +// * VPMINSD m256, ymm, ymm [AVX2] +// * VPMINSD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMINSD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMINSD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINSD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINSD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMINSD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMINSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINSD", 3, Operands { v0, v1, v2 }) + // VPMINSD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINSD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINSD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINSD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINSD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMINSD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINSD") + } + return p +} + +// VPMINSQ performs "Minimum of Packed Signed Quadword Integers". +// +// Mnemonic : VPMINSQ +// Supported forms : (6 forms) +// +// * VPMINSQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMINSQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMINSQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINSQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINSQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMINSQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMINSQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINSQ", 3, Operands { v0, v1, v2 }) + // VPMINSQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINSQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINSQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMINSQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINSQ") + } + return p +} + +// VPMINSW performs "Minimum of Packed Signed Word Integers". +// +// Mnemonic : VPMINSW +// Supported forms : (10 forms) +// +// * VPMINSW xmm, xmm, xmm [AVX] +// * VPMINSW m128, xmm, xmm [AVX] +// * VPMINSW ymm, ymm, ymm [AVX2] +// * VPMINSW m256, ymm, ymm [AVX2] +// * VPMINSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMINSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMINSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMINSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMINSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINSW", 3, Operands { v0, v1, v2 }) + // VPMINSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xea) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xea) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xea) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xea) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xea) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xea) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xea) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xea) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xea) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xea) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINSW") + } + return p +} + +// VPMINUB performs "Minimum of Packed Unsigned Byte Integers". +// +// Mnemonic : VPMINUB +// Supported forms : (10 forms) +// +// * VPMINUB xmm, xmm, xmm [AVX] +// * VPMINUB m128, xmm, xmm [AVX] +// * VPMINUB ymm, ymm, ymm [AVX2] +// * VPMINUB m256, ymm, ymm [AVX2] +// * VPMINUB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMINUB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMINUB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINUB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINUB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMINUB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMINUB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINUB", 3, Operands { v0, v1, v2 }) + // VPMINUB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xda) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xda) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINUB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xda) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xda) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINUB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xda) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xda) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINUB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xda) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xda) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINUB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xda) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xda) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINUB") + } + return p +} + +// VPMINUD performs "Minimum of Packed Unsigned Doubleword Integers". +// +// Mnemonic : VPMINUD +// Supported forms : (10 forms) +// +// * VPMINUD xmm, xmm, xmm [AVX] +// * VPMINUD m128, xmm, xmm [AVX] +// * VPMINUD ymm, ymm, ymm [AVX2] +// * VPMINUD m256, ymm, ymm [AVX2] +// * VPMINUD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMINUD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMINUD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINUD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINUD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMINUD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMINUD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINUD", 3, Operands { v0, v1, v2 }) + // VPMINUD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINUD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINUD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINUD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINUD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMINUD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINUD") + } + return p +} + +// VPMINUQ performs "Minimum of Packed Unsigned Quadword Integers". +// +// Mnemonic : VPMINUQ +// Supported forms : (6 forms) +// +// * VPMINUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMINUQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMINUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINUQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMINUQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMINUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINUQ", 3, Operands { v0, v1, v2 }) + // VPMINUQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINUQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINUQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMINUQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINUQ") + } + return p +} + +// VPMINUW performs "Minimum of Packed Unsigned Word Integers". +// +// Mnemonic : VPMINUW +// Supported forms : (10 forms) +// +// * VPMINUW xmm, xmm, xmm [AVX] +// * VPMINUW m128, xmm, xmm [AVX] +// * VPMINUW ymm, ymm, ymm [AVX2] +// * VPMINUW m256, ymm, ymm [AVX2] +// * VPMINUW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMINUW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMINUW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINUW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINUW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMINUW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMINUW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINUW", 3, Operands { v0, v1, v2 }) + // VPMINUW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINUW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINUW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3a) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINUW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3a) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINUW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3a) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINUW") + } + return p +} + +// VPMOVB2M performs "Move Signs of Packed Byte Integers to Mask Register". +// +// Mnemonic : VPMOVB2M +// Supported forms : (3 forms) +// +// * VPMOVB2M zmm, k [AVX512BW] +// * VPMOVB2M xmm, k [AVX512BW,AVX512VL] +// * VPMOVB2M ymm, k [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVB2M(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVB2M", 2, Operands { v0, v1 }) + // VPMOVB2M zmm, k + if isZMM(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x29) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVB2M xmm, k + if isEVEXXMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x08) + m.emit(0x29) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVB2M ymm, k + if isEVEXYMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x28) + m.emit(0x29) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVB2M") + } + return p +} + +// VPMOVD2M performs "Move Signs of Packed Doubleword Integers to Mask Register". +// +// Mnemonic : VPMOVD2M +// Supported forms : (3 forms) +// +// * VPMOVD2M zmm, k [AVX512DQ] +// * VPMOVD2M xmm, k [AVX512DQ,AVX512VL] +// * VPMOVD2M ymm, k [AVX512DQ,AVX512VL] +// +func (self *Program) VPMOVD2M(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVD2M", 2, Operands { v0, v1 }) + // VPMOVD2M zmm, k + if isZMM(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVD2M xmm, k + if isEVEXXMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x08) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVD2M ymm, k + if isEVEXYMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x28) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVD2M") + } + return p +} + +// VPMOVDB performs "Down Convert Packed Doubleword Values to Byte Values with Truncation". +// +// Mnemonic : VPMOVDB +// Supported forms : (6 forms) +// +// * VPMOVDB zmm, xmm{k}{z} [AVX512F] +// * VPMOVDB zmm, m128{k}{z} [AVX512F] +// * VPMOVDB xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVDB xmm, m32{k}{z} [AVX512F,AVX512VL] +// * VPMOVDB ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVDB ymm, m64{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVDB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVDB", 2, Operands { v0, v1 }) + // VPMOVDB zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x31) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVDB zmm, m128{k}{z} + if isZMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x31) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VPMOVDB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x31) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVDB xmm, m32{k}{z} + if isEVEXXMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x31) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPMOVDB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x31) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVDB ymm, m64{k}{z} + if isEVEXYMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x31) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVDB") + } + return p +} + +// VPMOVDW performs "Down Convert Packed Doubleword Values to Word Values with Truncation". +// +// Mnemonic : VPMOVDW +// Supported forms : (6 forms) +// +// * VPMOVDW zmm, ymm{k}{z} [AVX512F] +// * VPMOVDW zmm, m256{k}{z} [AVX512F] +// * VPMOVDW xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVDW xmm, m64{k}{z} [AVX512F,AVX512VL] +// * VPMOVDW ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVDW ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVDW", 2, Operands { v0, v1 }) + // VPMOVDW zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x33) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVDW zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x33) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVDW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x33) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVDW xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x33) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVDW ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x33) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVDW ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x33) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVDW") + } + return p +} + +// VPMOVM2B performs "Expand Bits of Mask Register to Packed Byte Integers". +// +// Mnemonic : VPMOVM2B +// Supported forms : (3 forms) +// +// * VPMOVM2B k, zmm [AVX512BW] +// * VPMOVM2B k, xmm [AVX512BW,AVX512VL] +// * VPMOVM2B k, ymm [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVM2B(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVM2B", 2, Operands { v0, v1 }) + // VPMOVM2B k, zmm + if isK(v0) && isZMM(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2B k, xmm + if isK(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x08) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2B k, ymm + if isK(v0) && isEVEXYMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x28) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVM2B") + } + return p +} + +// VPMOVM2D performs "Expand Bits of Mask Register to Packed Doubleword Integers". +// +// Mnemonic : VPMOVM2D +// Supported forms : (3 forms) +// +// * VPMOVM2D k, zmm [AVX512DQ] +// * VPMOVM2D k, xmm [AVX512DQ,AVX512VL] +// * VPMOVM2D k, ymm [AVX512DQ,AVX512VL] +// +func (self *Program) VPMOVM2D(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVM2D", 2, Operands { v0, v1 }) + // VPMOVM2D k, zmm + if isK(v0) && isZMM(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x38) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2D k, xmm + if isK(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x08) + m.emit(0x38) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2D k, ymm + if isK(v0) && isEVEXYMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x28) + m.emit(0x38) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVM2D") + } + return p +} + +// VPMOVM2Q performs "Expand Bits of Mask Register to Packed Quadword Integers". +// +// Mnemonic : VPMOVM2Q +// Supported forms : (3 forms) +// +// * VPMOVM2Q k, zmm [AVX512DQ] +// * VPMOVM2Q k, xmm [AVX512DQ,AVX512VL] +// * VPMOVM2Q k, ymm [AVX512DQ,AVX512VL] +// +func (self *Program) VPMOVM2Q(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVM2Q", 2, Operands { v0, v1 }) + // VPMOVM2Q k, zmm + if isK(v0) && isZMM(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x38) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2Q k, xmm + if isK(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x08) + m.emit(0x38) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2Q k, ymm + if isK(v0) && isEVEXYMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x28) + m.emit(0x38) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVM2Q") + } + return p +} + +// VPMOVM2W performs "Expand Bits of Mask Register to Packed Word Integers". +// +// Mnemonic : VPMOVM2W +// Supported forms : (3 forms) +// +// * VPMOVM2W k, zmm [AVX512BW] +// * VPMOVM2W k, xmm [AVX512BW,AVX512VL] +// * VPMOVM2W k, ymm [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVM2W(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVM2W", 2, Operands { v0, v1 }) + // VPMOVM2W k, zmm + if isK(v0) && isZMM(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2W k, xmm + if isK(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x08) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2W k, ymm + if isK(v0) && isEVEXYMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x28) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVM2W") + } + return p +} + +// VPMOVMSKB performs "Move Byte Mask". +// +// Mnemonic : VPMOVMSKB +// Supported forms : (2 forms) +// +// * VPMOVMSKB xmm, r32 [AVX] +// * VPMOVMSKB ymm, r32 [AVX2] +// +func (self *Program) VPMOVMSKB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVMSKB", 2, Operands { v0, v1 }) + // VPMOVMSKB xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0xd7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVMSKB ymm, r32 + if isYMM(v0) && isReg32(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0xd7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVMSKB") + } + return p +} + +// VPMOVQ2M performs "Move Signs of Packed Quadword Integers to Mask Register". +// +// Mnemonic : VPMOVQ2M +// Supported forms : (3 forms) +// +// * VPMOVQ2M zmm, k [AVX512DQ] +// * VPMOVQ2M xmm, k [AVX512DQ,AVX512VL] +// * VPMOVQ2M ymm, k [AVX512DQ,AVX512VL] +// +func (self *Program) VPMOVQ2M(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVQ2M", 2, Operands { v0, v1 }) + // VPMOVQ2M zmm, k + if isZMM(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVQ2M xmm, k + if isEVEXXMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x08) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVQ2M ymm, k + if isEVEXYMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x28) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVQ2M") + } + return p +} + +// VPMOVQB performs "Down Convert Packed Quadword Values to Byte Values with Truncation". +// +// Mnemonic : VPMOVQB +// Supported forms : (6 forms) +// +// * VPMOVQB zmm, xmm{k}{z} [AVX512F] +// * VPMOVQB zmm, m64{k}{z} [AVX512F] +// * VPMOVQB xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVQB xmm, m16{k}{z} [AVX512F,AVX512VL] +// * VPMOVQB ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVQB ymm, m32{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVQB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVQB", 2, Operands { v0, v1 }) + // VPMOVQB zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x32) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQB zmm, m64{k}{z} + if isZMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x32) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVQB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x32) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQB xmm, m16{k}{z} + if isEVEXXMM(v0) && isM16kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x32) + m.mrsd(lcode(v[0]), addr(v[1]), 2) + }) + } + // VPMOVQB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x32) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQB ymm, m32{k}{z} + if isEVEXYMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x32) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVQB") + } + return p +} + +// VPMOVQD performs "Down Convert Packed Quadword Values to Doubleword Values with Truncation". +// +// Mnemonic : VPMOVQD +// Supported forms : (6 forms) +// +// * VPMOVQD zmm, ymm{k}{z} [AVX512F] +// * VPMOVQD zmm, m256{k}{z} [AVX512F] +// * VPMOVQD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVQD xmm, m64{k}{z} [AVX512F,AVX512VL] +// * VPMOVQD ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVQD ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVQD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVQD", 2, Operands { v0, v1 }) + // VPMOVQD zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x35) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQD zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x35) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVQD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x35) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQD xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x35) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVQD ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x35) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQD ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x35) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVQD") + } + return p +} + +// VPMOVQW performs "Down Convert Packed Quadword Values to Word Values with Truncation". +// +// Mnemonic : VPMOVQW +// Supported forms : (6 forms) +// +// * VPMOVQW zmm, xmm{k}{z} [AVX512F] +// * VPMOVQW zmm, m128{k}{z} [AVX512F] +// * VPMOVQW xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVQW xmm, m32{k}{z} [AVX512F,AVX512VL] +// * VPMOVQW ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVQW ymm, m64{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVQW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVQW", 2, Operands { v0, v1 }) + // VPMOVQW zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x34) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQW zmm, m128{k}{z} + if isZMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x34) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VPMOVQW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x34) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQW xmm, m32{k}{z} + if isEVEXXMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x34) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPMOVQW ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x34) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQW ymm, m64{k}{z} + if isEVEXYMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x34) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVQW") + } + return p +} + +// VPMOVSDB performs "Down Convert Packed Doubleword Values to Byte Values with Signed Saturation". +// +// Mnemonic : VPMOVSDB +// Supported forms : (6 forms) +// +// * VPMOVSDB zmm, xmm{k}{z} [AVX512F] +// * VPMOVSDB zmm, m128{k}{z} [AVX512F] +// * VPMOVSDB xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSDB xmm, m32{k}{z} [AVX512F,AVX512VL] +// * VPMOVSDB ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSDB ymm, m64{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSDB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSDB", 2, Operands { v0, v1 }) + // VPMOVSDB zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x21) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSDB zmm, m128{k}{z} + if isZMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x21) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VPMOVSDB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x21) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSDB xmm, m32{k}{z} + if isEVEXXMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x21) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPMOVSDB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x21) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSDB ymm, m64{k}{z} + if isEVEXYMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x21) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSDB") + } + return p +} + +// VPMOVSDW performs "Down Convert Packed Doubleword Values to Word Values with Signed Saturation". +// +// Mnemonic : VPMOVSDW +// Supported forms : (6 forms) +// +// * VPMOVSDW zmm, ymm{k}{z} [AVX512F] +// * VPMOVSDW zmm, m256{k}{z} [AVX512F] +// * VPMOVSDW xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSDW xmm, m64{k}{z} [AVX512F,AVX512VL] +// * VPMOVSDW ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSDW ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSDW", 2, Operands { v0, v1 }) + // VPMOVSDW zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x23) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSDW zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x23) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVSDW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x23) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSDW xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x23) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVSDW ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x23) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSDW ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x23) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSDW") + } + return p +} + +// VPMOVSQB performs "Down Convert Packed Quadword Values to Byte Values with Signed Saturation". +// +// Mnemonic : VPMOVSQB +// Supported forms : (6 forms) +// +// * VPMOVSQB zmm, xmm{k}{z} [AVX512F] +// * VPMOVSQB zmm, m64{k}{z} [AVX512F] +// * VPMOVSQB xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQB xmm, m16{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQB ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQB ymm, m32{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSQB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSQB", 2, Operands { v0, v1 }) + // VPMOVSQB zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x22) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQB zmm, m64{k}{z} + if isZMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x22) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVSQB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x22) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQB xmm, m16{k}{z} + if isEVEXXMM(v0) && isM16kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x22) + m.mrsd(lcode(v[0]), addr(v[1]), 2) + }) + } + // VPMOVSQB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x22) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQB ymm, m32{k}{z} + if isEVEXYMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x22) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSQB") + } + return p +} + +// VPMOVSQD performs "Down Convert Packed Quadword Values to Doubleword Values with Signed Saturation". +// +// Mnemonic : VPMOVSQD +// Supported forms : (6 forms) +// +// * VPMOVSQD zmm, ymm{k}{z} [AVX512F] +// * VPMOVSQD zmm, m256{k}{z} [AVX512F] +// * VPMOVSQD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQD xmm, m64{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQD ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQD ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSQD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSQD", 2, Operands { v0, v1 }) + // VPMOVSQD zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x25) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQD zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x25) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVSQD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x25) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQD xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x25) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVSQD ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x25) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQD ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x25) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSQD") + } + return p +} + +// VPMOVSQW performs "Down Convert Packed Quadword Values to Word Values with Signed Saturation". +// +// Mnemonic : VPMOVSQW +// Supported forms : (6 forms) +// +// * VPMOVSQW zmm, xmm{k}{z} [AVX512F] +// * VPMOVSQW zmm, m128{k}{z} [AVX512F] +// * VPMOVSQW xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQW xmm, m32{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQW ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQW ymm, m64{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSQW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSQW", 2, Operands { v0, v1 }) + // VPMOVSQW zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x24) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQW zmm, m128{k}{z} + if isZMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x24) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VPMOVSQW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x24) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQW xmm, m32{k}{z} + if isEVEXXMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x24) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPMOVSQW ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x24) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQW ymm, m64{k}{z} + if isEVEXYMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x24) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSQW") + } + return p +} + +// VPMOVSWB performs "Down Convert Packed Word Values to Byte Values with Signed Saturation". +// +// Mnemonic : VPMOVSWB +// Supported forms : (6 forms) +// +// * VPMOVSWB zmm, ymm{k}{z} [AVX512BW] +// * VPMOVSWB zmm, m256{k}{z} [AVX512BW] +// * VPMOVSWB xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVSWB xmm, m64{k}{z} [AVX512BW,AVX512VL] +// * VPMOVSWB ymm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVSWB ymm, m128{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVSWB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSWB", 2, Operands { v0, v1 }) + // VPMOVSWB zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x20) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSWB zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x20) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVSWB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x20) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSWB xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x20) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVSWB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x20) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSWB ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x20) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSWB") + } + return p +} + +// VPMOVSXBD performs "Move Packed Byte Integers to Doubleword Integers with Sign Extension". +// +// Mnemonic : VPMOVSXBD +// Supported forms : (10 forms) +// +// * VPMOVSXBD xmm, xmm [AVX] +// * VPMOVSXBD m32, xmm [AVX] +// * VPMOVSXBD xmm, ymm [AVX2] +// * VPMOVSXBD m64, ymm [AVX2] +// * VPMOVSXBD xmm, zmm{k}{z} [AVX512F] +// * VPMOVSXBD m128, zmm{k}{z} [AVX512F] +// * VPMOVSXBD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXBD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXBD m32, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXBD m64, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSXBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSXBD", 2, Operands { v0, v1 }) + // VPMOVSXBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x21) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x21) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXBD xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x21) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBD m64, ymm + if isM64(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x21) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXBD xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x21) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBD m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x21) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPMOVSXBD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x21) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x21) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBD m32, xmm{k}{z} + if isM32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x21) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPMOVSXBD m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x21) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSXBD") + } + return p +} + +// VPMOVSXBQ performs "Move Packed Byte Integers to Quadword Integers with Sign Extension". +// +// Mnemonic : VPMOVSXBQ +// Supported forms : (10 forms) +// +// * VPMOVSXBQ xmm, xmm [AVX] +// * VPMOVSXBQ m16, xmm [AVX] +// * VPMOVSXBQ xmm, ymm [AVX2] +// * VPMOVSXBQ m32, ymm [AVX2] +// * VPMOVSXBQ xmm, zmm{k}{z} [AVX512F] +// * VPMOVSXBQ m64, zmm{k}{z} [AVX512F] +// * VPMOVSXBQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXBQ xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXBQ m16, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXBQ m32, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSXBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSXBQ", 2, Operands { v0, v1 }) + // VPMOVSXBQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x22) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBQ m16, xmm + if isM16(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x22) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXBQ xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x22) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBQ m32, ymm + if isM32(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x22) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXBQ xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x22) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBQ m64, zmm{k}{z} + if isM64(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x22) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVSXBQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x22) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBQ xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x22) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBQ m16, xmm{k}{z} + if isM16(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x22) + m.mrsd(lcode(v[1]), addr(v[0]), 2) + }) + } + // VPMOVSXBQ m32, ymm{k}{z} + if isM32(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x22) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSXBQ") + } + return p +} + +// VPMOVSXBW performs "Move Packed Byte Integers to Word Integers with Sign Extension". +// +// Mnemonic : VPMOVSXBW +// Supported forms : (10 forms) +// +// * VPMOVSXBW xmm, xmm [AVX] +// * VPMOVSXBW m64, xmm [AVX] +// * VPMOVSXBW xmm, ymm [AVX2] +// * VPMOVSXBW m128, ymm [AVX2] +// * VPMOVSXBW ymm, zmm{k}{z} [AVX512BW] +// * VPMOVSXBW m256, zmm{k}{z} [AVX512BW] +// * VPMOVSXBW xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVSXBW xmm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVSXBW m64, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVSXBW m128, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVSXBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSXBW", 2, Operands { v0, v1 }) + // VPMOVSXBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x20) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBW m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x20) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXBW xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x20) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBW m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x20) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXBW ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x20) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBW m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x20) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPMOVSXBW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x20) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBW xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x20) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBW m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x20) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVSXBW m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x20) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSXBW") + } + return p +} + +// VPMOVSXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Sign Extension". +// +// Mnemonic : VPMOVSXDQ +// Supported forms : (10 forms) +// +// * VPMOVSXDQ xmm, xmm [AVX] +// * VPMOVSXDQ m64, xmm [AVX] +// * VPMOVSXDQ xmm, ymm [AVX2] +// * VPMOVSXDQ m128, ymm [AVX2] +// * VPMOVSXDQ ymm, zmm{k}{z} [AVX512F] +// * VPMOVSXDQ m256, zmm{k}{z} [AVX512F] +// * VPMOVSXDQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXDQ xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXDQ m64, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXDQ m128, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSXDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSXDQ", 2, Operands { v0, v1 }) + // VPMOVSXDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x25) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXDQ m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x25) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXDQ xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x25) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXDQ m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x25) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXDQ ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x25) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXDQ m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x25) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPMOVSXDQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x25) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXDQ xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x25) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXDQ m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x25) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVSXDQ m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x25) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSXDQ") + } + return p +} + +// VPMOVSXWD performs "Move Packed Word Integers to Doubleword Integers with Sign Extension". +// +// Mnemonic : VPMOVSXWD +// Supported forms : (10 forms) +// +// * VPMOVSXWD xmm, xmm [AVX] +// * VPMOVSXWD m64, xmm [AVX] +// * VPMOVSXWD xmm, ymm [AVX2] +// * VPMOVSXWD m128, ymm [AVX2] +// * VPMOVSXWD ymm, zmm{k}{z} [AVX512F] +// * VPMOVSXWD m256, zmm{k}{z} [AVX512F] +// * VPMOVSXWD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXWD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXWD m64, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXWD m128, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSXWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSXWD", 2, Operands { v0, v1 }) + // VPMOVSXWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXWD xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWD m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXWD ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWD m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPMOVSXWD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWD m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVSXWD m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSXWD") + } + return p +} + +// VPMOVSXWQ performs "Move Packed Word Integers to Quadword Integers with Sign Extension". +// +// Mnemonic : VPMOVSXWQ +// Supported forms : (10 forms) +// +// * VPMOVSXWQ xmm, xmm [AVX] +// * VPMOVSXWQ m32, xmm [AVX] +// * VPMOVSXWQ xmm, ymm [AVX2] +// * VPMOVSXWQ m64, ymm [AVX2] +// * VPMOVSXWQ xmm, zmm{k}{z} [AVX512F] +// * VPMOVSXWQ m128, zmm{k}{z} [AVX512F] +// * VPMOVSXWQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXWQ xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXWQ m32, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXWQ m64, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSXWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSXWQ", 2, Operands { v0, v1 }) + // VPMOVSXWQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x24) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWQ m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x24) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXWQ xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x24) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWQ m64, ymm + if isM64(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x24) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXWQ xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x24) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWQ m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x24) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPMOVSXWQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x24) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWQ xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x24) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWQ m32, xmm{k}{z} + if isM32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x24) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPMOVSXWQ m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x24) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSXWQ") + } + return p +} + +// VPMOVUSDB performs "Down Convert Packed Doubleword Values to Byte Values with Unsigned Saturation". +// +// Mnemonic : VPMOVUSDB +// Supported forms : (6 forms) +// +// * VPMOVUSDB zmm, xmm{k}{z} [AVX512F] +// * VPMOVUSDB zmm, m128{k}{z} [AVX512F] +// * VPMOVUSDB xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSDB xmm, m32{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSDB ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSDB ymm, m64{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVUSDB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVUSDB", 2, Operands { v0, v1 }) + // VPMOVUSDB zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSDB zmm, m128{k}{z} + if isZMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VPMOVUSDB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSDB xmm, m32{k}{z} + if isEVEXXMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPMOVUSDB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSDB ymm, m64{k}{z} + if isEVEXYMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVUSDB") + } + return p +} + +// VPMOVUSDW performs "Down Convert Packed Doubleword Values to Word Values with Unsigned Saturation". +// +// Mnemonic : VPMOVUSDW +// Supported forms : (6 forms) +// +// * VPMOVUSDW zmm, ymm{k}{z} [AVX512F] +// * VPMOVUSDW zmm, m256{k}{z} [AVX512F] +// * VPMOVUSDW xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSDW xmm, m64{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSDW ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSDW ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVUSDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVUSDW", 2, Operands { v0, v1 }) + // VPMOVUSDW zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x13) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSDW zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVUSDW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x13) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSDW xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVUSDW ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x13) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSDW ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVUSDW") + } + return p +} + +// VPMOVUSQB performs "Down Convert Packed Quadword Values to Byte Values with Unsigned Saturation". +// +// Mnemonic : VPMOVUSQB +// Supported forms : (6 forms) +// +// * VPMOVUSQB zmm, xmm{k}{z} [AVX512F] +// * VPMOVUSQB zmm, m64{k}{z} [AVX512F] +// * VPMOVUSQB xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQB xmm, m16{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQB ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQB ymm, m32{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVUSQB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVUSQB", 2, Operands { v0, v1 }) + // VPMOVUSQB zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x12) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQB zmm, m64{k}{z} + if isZMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVUSQB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x12) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQB xmm, m16{k}{z} + if isEVEXXMM(v0) && isM16kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[0]), addr(v[1]), 2) + }) + } + // VPMOVUSQB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x12) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQB ymm, m32{k}{z} + if isEVEXYMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVUSQB") + } + return p +} + +// VPMOVUSQD performs "Down Convert Packed Quadword Values to Doubleword Values with Unsigned Saturation". +// +// Mnemonic : VPMOVUSQD +// Supported forms : (6 forms) +// +// * VPMOVUSQD zmm, ymm{k}{z} [AVX512F] +// * VPMOVUSQD zmm, m256{k}{z} [AVX512F] +// * VPMOVUSQD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQD xmm, m64{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQD ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQD ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVUSQD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVUSQD", 2, Operands { v0, v1 }) + // VPMOVUSQD zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x15) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQD zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x15) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVUSQD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x15) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQD xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x15) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVUSQD ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x15) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQD ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x15) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVUSQD") + } + return p +} + +// VPMOVUSQW performs "Down Convert Packed Quadword Values to Word Values with Unsigned Saturation". +// +// Mnemonic : VPMOVUSQW +// Supported forms : (6 forms) +// +// * VPMOVUSQW zmm, xmm{k}{z} [AVX512F] +// * VPMOVUSQW zmm, m128{k}{z} [AVX512F] +// * VPMOVUSQW xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQW xmm, m32{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQW ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQW ymm, m64{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVUSQW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVUSQW", 2, Operands { v0, v1 }) + // VPMOVUSQW zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x14) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQW zmm, m128{k}{z} + if isZMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x14) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VPMOVUSQW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x14) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQW xmm, m32{k}{z} + if isEVEXXMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x14) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPMOVUSQW ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x14) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQW ymm, m64{k}{z} + if isEVEXYMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x14) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVUSQW") + } + return p +} + +// VPMOVUSWB performs "Down Convert Packed Word Values to Byte Values with Unsigned Saturation". +// +// Mnemonic : VPMOVUSWB +// Supported forms : (6 forms) +// +// * VPMOVUSWB zmm, ymm{k}{z} [AVX512BW] +// * VPMOVUSWB zmm, m256{k}{z} [AVX512BW] +// * VPMOVUSWB xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVUSWB xmm, m64{k}{z} [AVX512BW,AVX512VL] +// * VPMOVUSWB ymm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVUSWB ymm, m128{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVUSWB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVUSWB", 2, Operands { v0, v1 }) + // VPMOVUSWB zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x10) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSWB zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVUSWB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x10) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSWB xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVUSWB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x10) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSWB ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVUSWB") + } + return p +} + +// VPMOVW2M performs "Move Signs of Packed Word Integers to Mask Register". +// +// Mnemonic : VPMOVW2M +// Supported forms : (3 forms) +// +// * VPMOVW2M zmm, k [AVX512BW] +// * VPMOVW2M xmm, k [AVX512BW,AVX512VL] +// * VPMOVW2M ymm, k [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVW2M(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVW2M", 2, Operands { v0, v1 }) + // VPMOVW2M zmm, k + if isZMM(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x29) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVW2M xmm, k + if isEVEXXMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x08) + m.emit(0x29) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVW2M ymm, k + if isEVEXYMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x28) + m.emit(0x29) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVW2M") + } + return p +} + +// VPMOVWB performs "Down Convert Packed Word Values to Byte Values with Truncation". +// +// Mnemonic : VPMOVWB +// Supported forms : (6 forms) +// +// * VPMOVWB zmm, ymm{k}{z} [AVX512BW] +// * VPMOVWB zmm, m256{k}{z} [AVX512BW] +// * VPMOVWB xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVWB xmm, m64{k}{z} [AVX512BW,AVX512VL] +// * VPMOVWB ymm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVWB ymm, m128{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVWB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVWB", 2, Operands { v0, v1 }) + // VPMOVWB zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x30) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVWB zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x30) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVWB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x30) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVWB xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x30) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVWB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x30) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVWB ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x30) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVWB") + } + return p +} + +// VPMOVZXBD performs "Move Packed Byte Integers to Doubleword Integers with Zero Extension". +// +// Mnemonic : VPMOVZXBD +// Supported forms : (10 forms) +// +// * VPMOVZXBD xmm, xmm [AVX] +// * VPMOVZXBD m32, xmm [AVX] +// * VPMOVZXBD xmm, ymm [AVX2] +// * VPMOVZXBD m64, ymm [AVX2] +// * VPMOVZXBD xmm, zmm{k}{z} [AVX512F] +// * VPMOVZXBD m128, zmm{k}{z} [AVX512F] +// * VPMOVZXBD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXBD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXBD m32, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXBD m64, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVZXBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVZXBD", 2, Operands { v0, v1 }) + // VPMOVZXBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x31) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x31) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXBD xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x31) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBD m64, ymm + if isM64(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x31) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXBD xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x31) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBD m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x31) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPMOVZXBD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x31) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x31) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBD m32, xmm{k}{z} + if isM32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x31) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPMOVZXBD m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x31) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVZXBD") + } + return p +} + +// VPMOVZXBQ performs "Move Packed Byte Integers to Quadword Integers with Zero Extension". +// +// Mnemonic : VPMOVZXBQ +// Supported forms : (10 forms) +// +// * VPMOVZXBQ xmm, xmm [AVX] +// * VPMOVZXBQ m16, xmm [AVX] +// * VPMOVZXBQ xmm, ymm [AVX2] +// * VPMOVZXBQ m32, ymm [AVX2] +// * VPMOVZXBQ xmm, zmm{k}{z} [AVX512F] +// * VPMOVZXBQ m64, zmm{k}{z} [AVX512F] +// * VPMOVZXBQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXBQ xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXBQ m16, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXBQ m32, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVZXBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVZXBQ", 2, Operands { v0, v1 }) + // VPMOVZXBQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x32) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBQ m16, xmm + if isM16(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x32) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXBQ xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x32) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBQ m32, ymm + if isM32(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x32) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXBQ xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x32) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBQ m64, zmm{k}{z} + if isM64(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x32) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVZXBQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x32) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBQ xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x32) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBQ m16, xmm{k}{z} + if isM16(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x32) + m.mrsd(lcode(v[1]), addr(v[0]), 2) + }) + } + // VPMOVZXBQ m32, ymm{k}{z} + if isM32(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x32) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVZXBQ") + } + return p +} + +// VPMOVZXBW performs "Move Packed Byte Integers to Word Integers with Zero Extension". +// +// Mnemonic : VPMOVZXBW +// Supported forms : (10 forms) +// +// * VPMOVZXBW xmm, xmm [AVX] +// * VPMOVZXBW m64, xmm [AVX] +// * VPMOVZXBW xmm, ymm [AVX2] +// * VPMOVZXBW m128, ymm [AVX2] +// * VPMOVZXBW ymm, zmm{k}{z} [AVX512BW] +// * VPMOVZXBW m256, zmm{k}{z} [AVX512BW] +// * VPMOVZXBW xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVZXBW xmm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVZXBW m64, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVZXBW m128, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVZXBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVZXBW", 2, Operands { v0, v1 }) + // VPMOVZXBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x30) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBW m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x30) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXBW xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x30) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBW m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x30) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXBW ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x30) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBW m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x30) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPMOVZXBW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x30) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBW xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x30) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBW m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x30) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVZXBW m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x30) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVZXBW") + } + return p +} + +// VPMOVZXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Zero Extension". +// +// Mnemonic : VPMOVZXDQ +// Supported forms : (10 forms) +// +// * VPMOVZXDQ xmm, xmm [AVX] +// * VPMOVZXDQ m64, xmm [AVX] +// * VPMOVZXDQ xmm, ymm [AVX2] +// * VPMOVZXDQ m128, ymm [AVX2] +// * VPMOVZXDQ ymm, zmm{k}{z} [AVX512F] +// * VPMOVZXDQ m256, zmm{k}{z} [AVX512F] +// * VPMOVZXDQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXDQ xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXDQ m64, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXDQ m128, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVZXDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVZXDQ", 2, Operands { v0, v1 }) + // VPMOVZXDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x35) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXDQ m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x35) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXDQ xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x35) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXDQ m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x35) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXDQ ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x35) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXDQ m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x35) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPMOVZXDQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x35) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXDQ xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x35) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXDQ m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x35) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVZXDQ m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x35) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVZXDQ") + } + return p +} + +// VPMOVZXWD performs "Move Packed Word Integers to Doubleword Integers with Zero Extension". +// +// Mnemonic : VPMOVZXWD +// Supported forms : (10 forms) +// +// * VPMOVZXWD xmm, xmm [AVX] +// * VPMOVZXWD m64, xmm [AVX] +// * VPMOVZXWD xmm, ymm [AVX2] +// * VPMOVZXWD m128, ymm [AVX2] +// * VPMOVZXWD ymm, zmm{k}{z} [AVX512F] +// * VPMOVZXWD m256, zmm{k}{z} [AVX512F] +// * VPMOVZXWD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXWD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXWD m64, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXWD m128, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVZXWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVZXWD", 2, Operands { v0, v1 }) + // VPMOVZXWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXWD xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWD m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXWD ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWD m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPMOVZXWD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWD m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVZXWD m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVZXWD") + } + return p +} + +// VPMOVZXWQ performs "Move Packed Word Integers to Quadword Integers with Zero Extension". +// +// Mnemonic : VPMOVZXWQ +// Supported forms : (10 forms) +// +// * VPMOVZXWQ xmm, xmm [AVX] +// * VPMOVZXWQ m32, xmm [AVX] +// * VPMOVZXWQ xmm, ymm [AVX2] +// * VPMOVZXWQ m64, ymm [AVX2] +// * VPMOVZXWQ xmm, zmm{k}{z} [AVX512F] +// * VPMOVZXWQ m128, zmm{k}{z} [AVX512F] +// * VPMOVZXWQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXWQ xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXWQ m32, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXWQ m64, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVZXWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVZXWQ", 2, Operands { v0, v1 }) + // VPMOVZXWQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x34) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWQ m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x34) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXWQ xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x34) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWQ m64, ymm + if isM64(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x34) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXWQ xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x34) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWQ m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x34) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPMOVZXWQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x34) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWQ xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x34) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWQ m32, xmm{k}{z} + if isM32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x34) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPMOVZXWQ m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x34) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVZXWQ") + } + return p +} + +// VPMULDQ performs "Multiply Packed Signed Doubleword Integers and Store Quadword Result". +// +// Mnemonic : VPMULDQ +// Supported forms : (10 forms) +// +// * VPMULDQ xmm, xmm, xmm [AVX] +// * VPMULDQ m128, xmm, xmm [AVX] +// * VPMULDQ ymm, ymm, ymm [AVX2] +// * VPMULDQ m256, ymm, ymm [AVX2] +// * VPMULDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMULDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMULDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMULDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMULDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMULDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMULDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULDQ", 3, Operands { v0, v1, v2 }) + // VPMULDQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x28) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULDQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x28) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULDQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x28) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULDQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x28) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULDQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x28) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x28) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULDQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x28) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x28) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULDQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x28) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMULDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x28) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULDQ") + } + return p +} + +// VPMULHRSW performs "Packed Multiply Signed Word Integers and Store High Result with Round and Scale". +// +// Mnemonic : VPMULHRSW +// Supported forms : (10 forms) +// +// * VPMULHRSW xmm, xmm, xmm [AVX] +// * VPMULHRSW m128, xmm, xmm [AVX] +// * VPMULHRSW ymm, ymm, ymm [AVX2] +// * VPMULHRSW m256, ymm, ymm [AVX2] +// * VPMULHRSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMULHRSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMULHRSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHRSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHRSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHRSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMULHRSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULHRSW", 3, Operands { v0, v1, v2 }) + // VPMULHRSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHRSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULHRSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHRSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULHRSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHRSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x0b) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULHRSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHRSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x0b) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULHRSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHRSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x0b) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULHRSW") + } + return p +} + +// VPMULHUW performs "Multiply Packed Unsigned Word Integers and Store High Result". +// +// Mnemonic : VPMULHUW +// Supported forms : (10 forms) +// +// * VPMULHUW xmm, xmm, xmm [AVX] +// * VPMULHUW m128, xmm, xmm [AVX] +// * VPMULHUW ymm, ymm, ymm [AVX2] +// * VPMULHUW m256, ymm, ymm [AVX2] +// * VPMULHUW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMULHUW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMULHUW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHUW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHUW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHUW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMULHUW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULHUW", 3, Operands { v0, v1, v2 }) + // VPMULHUW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHUW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe4) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULHUW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHUW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe4) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULHUW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHUW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe4) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULHUW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHUW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe4) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULHUW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHUW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe4) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULHUW") + } + return p +} + +// VPMULHW performs "Multiply Packed Signed Word Integers and Store High Result". +// +// Mnemonic : VPMULHW +// Supported forms : (10 forms) +// +// * VPMULHW xmm, xmm, xmm [AVX] +// * VPMULHW m128, xmm, xmm [AVX] +// * VPMULHW ymm, ymm, ymm [AVX2] +// * VPMULHW m256, ymm, ymm [AVX2] +// * VPMULHW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMULHW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMULHW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMULHW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULHW", 3, Operands { v0, v1, v2 }) + // VPMULHW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULHW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULHW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe5) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULHW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe5) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULHW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe5) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULHW") + } + return p +} + +// VPMULLD performs "Multiply Packed Signed Doubleword Integers and Store Low Result". +// +// Mnemonic : VPMULLD +// Supported forms : (10 forms) +// +// * VPMULLD xmm, xmm, xmm [AVX] +// * VPMULLD m128, xmm, xmm [AVX] +// * VPMULLD ymm, ymm, ymm [AVX2] +// * VPMULLD m256, ymm, ymm [AVX2] +// * VPMULLD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMULLD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMULLD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMULLD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMULLD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMULLD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMULLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULLD", 3, Operands { v0, v1, v2 }) + // VPMULLD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULLD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULLD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULLD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULLD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMULLD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULLD") + } + return p +} + +// VPMULLQ performs "Multiply Packed Signed Quadword Integers and Store Low Result". +// +// Mnemonic : VPMULLQ +// Supported forms : (6 forms) +// +// * VPMULLQ m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VPMULLQ zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VPMULLQ m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VPMULLQ xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VPMULLQ m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VPMULLQ ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VPMULLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULLQ", 3, Operands { v0, v1, v2 }) + // VPMULLQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULLQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULLQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMULLQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULLQ") + } + return p +} + +// VPMULLW performs "Multiply Packed Signed Word Integers and Store Low Result". +// +// Mnemonic : VPMULLW +// Supported forms : (10 forms) +// +// * VPMULLW xmm, xmm, xmm [AVX] +// * VPMULLW m128, xmm, xmm [AVX] +// * VPMULLW ymm, ymm, ymm [AVX2] +// * VPMULLW m256, ymm, ymm [AVX2] +// * VPMULLW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMULLW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMULLW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULLW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULLW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMULLW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMULLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULLW", 3, Operands { v0, v1, v2 }) + // VPMULLW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULLW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULLW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xd5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd5) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULLW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xd5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd5) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULLW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xd5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd5) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULLW") + } + return p +} + +// VPMULTISHIFTQB performs "Select Packed Unaligned Bytes from Quadword Sources". +// +// Mnemonic : VPMULTISHIFTQB +// Supported forms : (6 forms) +// +// * VPMULTISHIFTQB m128/m64bcst, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPMULTISHIFTQB xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPMULTISHIFTQB m256/m64bcst, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPMULTISHIFTQB ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPMULTISHIFTQB m512/m64bcst, zmm, zmm{k}{z} [AVX512VBMI] +// * VPMULTISHIFTQB zmm, zmm, zmm{k}{z} [AVX512VBMI] +// +func (self *Program) VPMULTISHIFTQB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULTISHIFTQB", 3, Operands { v0, v1, v2 }) + // VPMULTISHIFTQB m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VBMI | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x83) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULTISHIFTQB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VBMI | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x83) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULTISHIFTQB m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VBMI | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x83) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMULTISHIFTQB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VBMI | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x83) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULTISHIFTQB m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x83) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULTISHIFTQB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x83) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULTISHIFTQB") + } + return p +} + +// VPMULUDQ performs "Multiply Packed Unsigned Doubleword Integers". +// +// Mnemonic : VPMULUDQ +// Supported forms : (10 forms) +// +// * VPMULUDQ xmm, xmm, xmm [AVX] +// * VPMULUDQ m128, xmm, xmm [AVX] +// * VPMULUDQ ymm, ymm, ymm [AVX2] +// * VPMULUDQ m256, ymm, ymm [AVX2] +// * VPMULUDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMULUDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMULUDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMULUDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMULUDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMULUDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMULUDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULUDQ", 3, Operands { v0, v1, v2 }) + // VPMULUDQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULUDQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf4) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULUDQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULUDQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf4) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULUDQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xf4) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULUDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xf4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULUDQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xf4) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULUDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xf4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULUDQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xf4) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMULUDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xf4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULUDQ") + } + return p +} + +// VPOPCNTD performs "Packed Population Count for Doubleword Integers". +// +// Mnemonic : VPOPCNTD +// Supported forms : (2 forms) +// +// * VPOPCNTD m512/m32bcst, zmm{k}{z} [AVX512VPOPCNTDQ] +// * VPOPCNTD zmm, zmm{k}{z} [AVX512VPOPCNTDQ] +// +func (self *Program) VPOPCNTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPOPCNTD", 2, Operands { v0, v1 }) + // VPOPCNTD m512/m32bcst, zmm{k}{z} + if isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512VPOPCNTDQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPOPCNTD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512VPOPCNTDQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x55) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPOPCNTD") + } + return p +} + +// VPOPCNTQ performs "Packed Population Count for Quadword Integers". +// +// Mnemonic : VPOPCNTQ +// Supported forms : (2 forms) +// +// * VPOPCNTQ m512/m64bcst, zmm{k}{z} [AVX512VPOPCNTDQ] +// * VPOPCNTQ zmm, zmm{k}{z} [AVX512VPOPCNTDQ] +// +func (self *Program) VPOPCNTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPOPCNTQ", 2, Operands { v0, v1 }) + // VPOPCNTQ m512/m64bcst, zmm{k}{z} + if isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512VPOPCNTDQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPOPCNTQ zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512VPOPCNTDQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x55) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPOPCNTQ") + } + return p +} + +// VPOR performs "Packed Bitwise Logical OR". +// +// Mnemonic : VPOR +// Supported forms : (4 forms) +// +// * VPOR xmm, xmm, xmm [AVX] +// * VPOR m128, xmm, xmm [AVX] +// * VPOR ymm, ymm, ymm [AVX2] +// * VPOR m256, ymm, ymm [AVX2] +// +func (self *Program) VPOR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPOR", 3, Operands { v0, v1, v2 }) + // VPOR xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPOR m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPOR ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPOR m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPOR") + } + return p +} + +// VPORD performs "Bitwise Logical OR of Packed Doubleword Integers". +// +// Mnemonic : VPORD +// Supported forms : (6 forms) +// +// * VPORD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPORD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPORD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPORD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPORD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPORD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPORD", 3, Operands { v0, v1, v2 }) + // VPORD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPORD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPORD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPORD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPORD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPORD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPORD") + } + return p +} + +// VPORQ performs "Bitwise Logical OR of Packed Quadword Integers". +// +// Mnemonic : VPORQ +// Supported forms : (6 forms) +// +// * VPORQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPORQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPORQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPORQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPORQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPORQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPORQ", 3, Operands { v0, v1, v2 }) + // VPORQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPORQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPORQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPORQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPORQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPORQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPORQ") + } + return p +} + +// VPPERM performs "Packed Permute Bytes". +// +// Mnemonic : VPPERM +// Supported forms : (3 forms) +// +// * VPPERM xmm, xmm, xmm, xmm [XOP] +// * VPPERM m128, xmm, xmm, xmm [XOP] +// * VPPERM xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPPERM(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPPERM", 4, Operands { v0, v1, v2, v3 }) + // VPPERM xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xa3) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[2]) << 3)) + m.emit(0xa3) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + } + // VPPERM m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x80, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0xa3) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VPPERM xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xa3) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPPERM") + } + return p +} + +// VPROLD performs "Rotate Packed Doubleword Left". +// +// Mnemonic : VPROLD +// Supported forms : (6 forms) +// +// * VPROLD imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPROLD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPROLD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPROLD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPROLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROLD", 3, Operands { v0, v1, v2 }) + // VPROLD imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(1, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLD imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(1, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLD imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(1, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPROLD") + } + return p +} + +// VPROLQ performs "Rotate Packed Quadword Left". +// +// Mnemonic : VPROLQ +// Supported forms : (6 forms) +// +// * VPROLQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPROLQ imm8, zmm, zmm{k}{z} [AVX512F] +// * VPROLQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPROLQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPROLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROLQ", 3, Operands { v0, v1, v2 }) + // VPROLQ imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(1, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLQ imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLQ imm8, m128/m64bcst, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(1, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLQ imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(1, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLQ imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLQ imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPROLQ") + } + return p +} + +// VPROLVD performs "Variable Rotate Packed Doubleword Left". +// +// Mnemonic : VPROLVD +// Supported forms : (6 forms) +// +// * VPROLVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPROLVD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPROLVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPROLVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPROLVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROLVD", 3, Operands { v0, v1, v2 }) + // VPROLVD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPROLVD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROLVD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPROLVD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROLVD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPROLVD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPROLVD") + } + return p +} + +// VPROLVQ performs "Variable Rotate Packed Quadword Left". +// +// Mnemonic : VPROLVQ +// Supported forms : (6 forms) +// +// * VPROLVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPROLVQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPROLVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPROLVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPROLVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROLVQ", 3, Operands { v0, v1, v2 }) + // VPROLVQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPROLVQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROLVQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPROLVQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROLVQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPROLVQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPROLVQ") + } + return p +} + +// VPRORD performs "Rotate Packed Doubleword Right". +// +// Mnemonic : VPRORD +// Supported forms : (6 forms) +// +// * VPRORD imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPRORD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPRORD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPRORD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPRORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPRORD", 3, Operands { v0, v1, v2 }) + // VPRORD imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(0, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORD imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(0, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORD imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(0, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPRORD") + } + return p +} + +// VPRORQ performs "Rotate Packed Quadword Right". +// +// Mnemonic : VPRORQ +// Supported forms : (6 forms) +// +// * VPRORQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPRORQ imm8, zmm, zmm{k}{z} [AVX512F] +// * VPRORQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPRORQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPRORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPRORQ", 3, Operands { v0, v1, v2 }) + // VPRORQ imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(0, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORQ imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORQ imm8, m128/m64bcst, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(0, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORQ imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(0, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORQ imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORQ imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPRORQ") + } + return p +} + +// VPRORVD performs "Variable Rotate Packed Doubleword Right". +// +// Mnemonic : VPRORVD +// Supported forms : (6 forms) +// +// * VPRORVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPRORVD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPRORVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPRORVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPRORVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPRORVD", 3, Operands { v0, v1, v2 }) + // VPRORVD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPRORVD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPRORVD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPRORVD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPRORVD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPRORVD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPRORVD") + } + return p +} + +// VPRORVQ performs "Variable Rotate Packed Quadword Right". +// +// Mnemonic : VPRORVQ +// Supported forms : (6 forms) +// +// * VPRORVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPRORVQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPRORVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPRORVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPRORVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPRORVQ", 3, Operands { v0, v1, v2 }) + // VPRORVQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPRORVQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPRORVQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPRORVQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPRORVQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPRORVQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPRORVQ") + } + return p +} + +// VPROTB performs "Packed Rotate Bytes". +// +// Mnemonic : VPROTB +// Supported forms : (5 forms) +// +// * VPROTB imm8, xmm, xmm [XOP] +// * VPROTB xmm, xmm, xmm [XOP] +// * VPROTB m128, xmm, xmm [XOP] +// * VPROTB imm8, m128, xmm [XOP] +// * VPROTB xmm, m128, xmm [XOP] +// +func (self *Program) VPROTB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROTB", 3, Operands { v0, v1, v2 }) + // VPROTB imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78) + m.emit(0xc0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x90) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x90) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROTB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x90) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPROTB imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0) + m.emit(0xc0) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTB xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x90) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPROTB") + } + return p +} + +// VPROTD performs "Packed Rotate Doublewords". +// +// Mnemonic : VPROTD +// Supported forms : (5 forms) +// +// * VPROTD imm8, xmm, xmm [XOP] +// * VPROTD xmm, xmm, xmm [XOP] +// * VPROTD m128, xmm, xmm [XOP] +// * VPROTD imm8, m128, xmm [XOP] +// * VPROTD xmm, m128, xmm [XOP] +// +func (self *Program) VPROTD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROTD", 3, Operands { v0, v1, v2 }) + // VPROTD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x92) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x92) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROTD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x92) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPROTD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0) + m.emit(0xc2) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTD xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x92) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPROTD") + } + return p +} + +// VPROTQ performs "Packed Rotate Quadwords". +// +// Mnemonic : VPROTQ +// Supported forms : (5 forms) +// +// * VPROTQ imm8, xmm, xmm [XOP] +// * VPROTQ xmm, xmm, xmm [XOP] +// * VPROTQ m128, xmm, xmm [XOP] +// * VPROTQ imm8, m128, xmm [XOP] +// * VPROTQ xmm, m128, xmm [XOP] +// +func (self *Program) VPROTQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROTQ", 3, Operands { v0, v1, v2 }) + // VPROTQ imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78) + m.emit(0xc3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x93) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x93) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROTQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x93) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPROTQ imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0) + m.emit(0xc3) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTQ xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x93) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPROTQ") + } + return p +} + +// VPROTW performs "Packed Rotate Words". +// +// Mnemonic : VPROTW +// Supported forms : (5 forms) +// +// * VPROTW imm8, xmm, xmm [XOP] +// * VPROTW xmm, xmm, xmm [XOP] +// * VPROTW m128, xmm, xmm [XOP] +// * VPROTW imm8, m128, xmm [XOP] +// * VPROTW xmm, m128, xmm [XOP] +// +func (self *Program) VPROTW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROTW", 3, Operands { v0, v1, v2 }) + // VPROTW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x91) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x91) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROTW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x91) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPROTW imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0) + m.emit(0xc1) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTW xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x91) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPROTW") + } + return p +} + +// VPSADBW performs "Compute Sum of Absolute Differences". +// +// Mnemonic : VPSADBW +// Supported forms : (10 forms) +// +// * VPSADBW xmm, xmm, xmm [AVX] +// * VPSADBW m128, xmm, xmm [AVX] +// * VPSADBW ymm, ymm, ymm [AVX2] +// * VPSADBW m256, ymm, ymm [AVX2] +// * VPSADBW zmm, zmm, zmm [AVX512BW] +// * VPSADBW m512, zmm, zmm [AVX512BW] +// * VPSADBW xmm, xmm, xmm [AVX512BW,AVX512VL] +// * VPSADBW m128, xmm, xmm [AVX512BW,AVX512VL] +// * VPSADBW ymm, ymm, ymm [AVX512BW,AVX512VL] +// * VPSADBW m256, ymm, ymm [AVX512BW,AVX512VL] +// +func (self *Program) VPSADBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSADBW", 3, Operands { v0, v1, v2 }) + // VPSADBW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSADBW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSADBW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSADBW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSADBW zmm, zmm, zmm + if isZMM(v0) && isZMM(v1) && isZMM(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSADBW m512, zmm, zmm + if isM512(v0) && isZMM(v1) && isZMM(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0xf6) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSADBW xmm, xmm, xmm + if isEVEXXMM(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSADBW m128, xmm, xmm + if isM128(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0xf6) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSADBW ymm, ymm, ymm + if isEVEXYMM(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x20) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSADBW m256, ymm, ymm + if isM256(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0xf6) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSADBW") + } + return p +} + +// VPSCATTERDD performs "Scatter Packed Doubleword Values with Signed Doubleword Indices". +// +// Mnemonic : VPSCATTERDD +// Supported forms : (3 forms) +// +// * VPSCATTERDD zmm, vm32z{k} [AVX512F] +// * VPSCATTERDD xmm, vm32x{k} [AVX512F,AVX512VL] +// * VPSCATTERDD ymm, vm32y{k} [AVX512F,AVX512VL] +// +func (self *Program) VPSCATTERDD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPSCATTERDD", 2, Operands { v0, v1 }) + // VPSCATTERDD zmm, vm32z{k} + if isZMM(v0) && isVMZk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa0) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPSCATTERDD xmm, vm32x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa0) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPSCATTERDD ymm, vm32y{k} + if isEVEXYMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa0) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPSCATTERDD") + } + return p +} + +// VPSCATTERDQ performs "Scatter Packed Quadword Values with Signed Doubleword Indices". +// +// Mnemonic : VPSCATTERDQ +// Supported forms : (3 forms) +// +// * VPSCATTERDQ zmm, vm32y{k} [AVX512F] +// * VPSCATTERDQ xmm, vm32x{k} [AVX512F,AVX512VL] +// * VPSCATTERDQ ymm, vm32x{k} [AVX512F,AVX512VL] +// +func (self *Program) VPSCATTERDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPSCATTERDQ", 2, Operands { v0, v1 }) + // VPSCATTERDQ zmm, vm32y{k} + if isZMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa0) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPSCATTERDQ xmm, vm32x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa0) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPSCATTERDQ ymm, vm32x{k} + if isEVEXYMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa0) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPSCATTERDQ") + } + return p +} + +// VPSCATTERQD performs "Scatter Packed Doubleword Values with Signed Quadword Indices". +// +// Mnemonic : VPSCATTERQD +// Supported forms : (3 forms) +// +// * VPSCATTERQD ymm, vm64z{k} [AVX512F] +// * VPSCATTERQD xmm, vm64x{k} [AVX512F,AVX512VL] +// * VPSCATTERQD xmm, vm64y{k} [AVX512F,AVX512VL] +// +func (self *Program) VPSCATTERQD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPSCATTERQD", 2, Operands { v0, v1 }) + // VPSCATTERQD ymm, vm64z{k} + if isEVEXYMM(v0) && isVMZk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa1) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPSCATTERQD xmm, vm64x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa1) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPSCATTERQD xmm, vm64y{k} + if isEVEXXMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa1) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPSCATTERQD") + } + return p +} + +// VPSCATTERQQ performs "Scatter Packed Quadword Values with Signed Quadword Indices". +// +// Mnemonic : VPSCATTERQQ +// Supported forms : (3 forms) +// +// * VPSCATTERQQ zmm, vm64z{k} [AVX512F] +// * VPSCATTERQQ xmm, vm64x{k} [AVX512F,AVX512VL] +// * VPSCATTERQQ ymm, vm64y{k} [AVX512F,AVX512VL] +// +func (self *Program) VPSCATTERQQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPSCATTERQQ", 2, Operands { v0, v1 }) + // VPSCATTERQQ zmm, vm64z{k} + if isZMM(v0) && isVMZk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa1) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPSCATTERQQ xmm, vm64x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa1) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPSCATTERQQ ymm, vm64y{k} + if isEVEXYMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa1) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPSCATTERQQ") + } + return p +} + +// VPSHAB performs "Packed Shift Arithmetic Bytes". +// +// Mnemonic : VPSHAB +// Supported forms : (3 forms) +// +// * VPSHAB xmm, xmm, xmm [XOP] +// * VPSHAB m128, xmm, xmm [XOP] +// * VPSHAB xmm, m128, xmm [XOP] +// +func (self *Program) VPSHAB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHAB", 3, Operands { v0, v1, v2 }) + // VPSHAB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHAB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHAB xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHAB") + } + return p +} + +// VPSHAD performs "Packed Shift Arithmetic Doublewords". +// +// Mnemonic : VPSHAD +// Supported forms : (3 forms) +// +// * VPSHAD xmm, xmm, xmm [XOP] +// * VPSHAD m128, xmm, xmm [XOP] +// * VPSHAD xmm, m128, xmm [XOP] +// +func (self *Program) VPSHAD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHAD", 3, Operands { v0, v1, v2 }) + // VPSHAD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHAD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHAD xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHAD") + } + return p +} + +// VPSHAQ performs "Packed Shift Arithmetic Quadwords". +// +// Mnemonic : VPSHAQ +// Supported forms : (3 forms) +// +// * VPSHAQ xmm, xmm, xmm [XOP] +// * VPSHAQ m128, xmm, xmm [XOP] +// * VPSHAQ xmm, m128, xmm [XOP] +// +func (self *Program) VPSHAQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHAQ", 3, Operands { v0, v1, v2 }) + // VPSHAQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHAQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHAQ xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x9b) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHAQ") + } + return p +} + +// VPSHAW performs "Packed Shift Arithmetic Words". +// +// Mnemonic : VPSHAW +// Supported forms : (3 forms) +// +// * VPSHAW xmm, xmm, xmm [XOP] +// * VPSHAW m128, xmm, xmm [XOP] +// * VPSHAW xmm, m128, xmm [XOP] +// +func (self *Program) VPSHAW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHAW", 3, Operands { v0, v1, v2 }) + // VPSHAW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x99) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x99) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHAW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x99) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHAW xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x99) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHAW") + } + return p +} + +// VPSHLB performs "Packed Shift Logical Bytes". +// +// Mnemonic : VPSHLB +// Supported forms : (3 forms) +// +// * VPSHLB xmm, xmm, xmm [XOP] +// * VPSHLB m128, xmm, xmm [XOP] +// * VPSHLB xmm, m128, xmm [XOP] +// +func (self *Program) VPSHLB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHLB", 3, Operands { v0, v1, v2 }) + // VPSHLB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x94) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x94) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHLB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x94) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHLB xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x94) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHLB") + } + return p +} + +// VPSHLD performs "Packed Shift Logical Doublewords". +// +// Mnemonic : VPSHLD +// Supported forms : (3 forms) +// +// * VPSHLD xmm, xmm, xmm [XOP] +// * VPSHLD m128, xmm, xmm [XOP] +// * VPSHLD xmm, m128, xmm [XOP] +// +func (self *Program) VPSHLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHLD", 3, Operands { v0, v1, v2 }) + // VPSHLD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHLD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHLD xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHLD") + } + return p +} + +// VPSHLQ performs "Packed Shift Logical Quadwords". +// +// Mnemonic : VPSHLQ +// Supported forms : (3 forms) +// +// * VPSHLQ xmm, xmm, xmm [XOP] +// * VPSHLQ m128, xmm, xmm [XOP] +// * VPSHLQ xmm, m128, xmm [XOP] +// +func (self *Program) VPSHLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHLQ", 3, Operands { v0, v1, v2 }) + // VPSHLQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHLQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHLQ xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHLQ") + } + return p +} + +// VPSHLW performs "Packed Shift Logical Words". +// +// Mnemonic : VPSHLW +// Supported forms : (3 forms) +// +// * VPSHLW xmm, xmm, xmm [XOP] +// * VPSHLW m128, xmm, xmm [XOP] +// * VPSHLW xmm, m128, xmm [XOP] +// +func (self *Program) VPSHLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHLW", 3, Operands { v0, v1, v2 }) + // VPSHLW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x95) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x95) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHLW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x95) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHLW xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x95) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHLW") + } + return p +} + +// VPSHUFB performs "Packed Shuffle Bytes". +// +// Mnemonic : VPSHUFB +// Supported forms : (10 forms) +// +// * VPSHUFB xmm, xmm, xmm [AVX] +// * VPSHUFB m128, xmm, xmm [AVX] +// * VPSHUFB ymm, ymm, ymm [AVX2] +// * VPSHUFB m256, ymm, ymm [AVX2] +// * VPSHUFB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSHUFB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSHUFB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSHUFB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHUFB", 3, Operands { v0, v1, v2 }) + // VPSHUFB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHUFB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHUFB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHUFB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHUFB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHUFB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSHUFB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHUFB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSHUFB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHUFB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHUFB") + } + return p +} + +// VPSHUFD performs "Shuffle Packed Doublewords". +// +// Mnemonic : VPSHUFD +// Supported forms : (10 forms) +// +// * VPSHUFD imm8, xmm, xmm [AVX] +// * VPSHUFD imm8, m128, xmm [AVX] +// * VPSHUFD imm8, ymm, ymm [AVX2] +// * VPSHUFD imm8, m256, ymm [AVX2] +// * VPSHUFD imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPSHUFD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPSHUFD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSHUFD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSHUFD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSHUFD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSHUFD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHUFD", 3, Operands { v0, v1, v2 }) + // VPSHUFD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[1], 0) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[1]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[1], 0) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[1]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHUFD") + } + return p +} + +// VPSHUFHW performs "Shuffle Packed High Words". +// +// Mnemonic : VPSHUFHW +// Supported forms : (10 forms) +// +// * VPSHUFHW imm8, xmm, xmm [AVX] +// * VPSHUFHW imm8, m128, xmm [AVX] +// * VPSHUFHW imm8, ymm, ymm [AVX2] +// * VPSHUFHW imm8, m256, ymm [AVX2] +// * VPSHUFHW imm8, zmm, zmm{k}{z} [AVX512BW] +// * VPSHUFHW imm8, m512, zmm{k}{z} [AVX512BW] +// * VPSHUFHW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFHW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFHW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFHW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSHUFHW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHUFHW", 3, Operands { v0, v1, v2 }) + // VPSHUFHW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[1], 0) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[1]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[2]), v[1], 0) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[2]), addr(v[1]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, m512, zmm{k}{z} + if isImm8(v0) && isM512(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, m128, xmm{k}{z} + if isImm8(v0) && isM128(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, m256, ymm{k}{z} + if isImm8(v0) && isM256(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHUFHW") + } + return p +} + +// VPSHUFLW performs "Shuffle Packed Low Words". +// +// Mnemonic : VPSHUFLW +// Supported forms : (10 forms) +// +// * VPSHUFLW imm8, xmm, xmm [AVX] +// * VPSHUFLW imm8, m128, xmm [AVX] +// * VPSHUFLW imm8, ymm, ymm [AVX2] +// * VPSHUFLW imm8, m256, ymm [AVX2] +// * VPSHUFLW imm8, zmm, zmm{k}{z} [AVX512BW] +// * VPSHUFLW imm8, m512, zmm{k}{z} [AVX512BW] +// * VPSHUFLW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFLW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFLW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFLW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSHUFLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHUFLW", 3, Operands { v0, v1, v2 }) + // VPSHUFLW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[1], 0) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[1]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), v[1], 0) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), addr(v[1]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, m512, zmm{k}{z} + if isImm8(v0) && isM512(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, m128, xmm{k}{z} + if isImm8(v0) && isM128(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, m256, ymm{k}{z} + if isImm8(v0) && isM256(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHUFLW") + } + return p +} + +// VPSIGNB performs "Packed Sign of Byte Integers". +// +// Mnemonic : VPSIGNB +// Supported forms : (4 forms) +// +// * VPSIGNB xmm, xmm, xmm [AVX] +// * VPSIGNB m128, xmm, xmm [AVX] +// * VPSIGNB ymm, ymm, ymm [AVX2] +// * VPSIGNB m256, ymm, ymm [AVX2] +// +func (self *Program) VPSIGNB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSIGNB", 3, Operands { v0, v1, v2 }) + // VPSIGNB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSIGNB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSIGNB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSIGNB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSIGNB") + } + return p +} + +// VPSIGND performs "Packed Sign of Doubleword Integers". +// +// Mnemonic : VPSIGND +// Supported forms : (4 forms) +// +// * VPSIGND xmm, xmm, xmm [AVX] +// * VPSIGND m128, xmm, xmm [AVX] +// * VPSIGND ymm, ymm, ymm [AVX2] +// * VPSIGND m256, ymm, ymm [AVX2] +// +func (self *Program) VPSIGND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSIGND", 3, Operands { v0, v1, v2 }) + // VPSIGND xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSIGND m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSIGND ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSIGND m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSIGND") + } + return p +} + +// VPSIGNW performs "Packed Sign of Word Integers". +// +// Mnemonic : VPSIGNW +// Supported forms : (4 forms) +// +// * VPSIGNW xmm, xmm, xmm [AVX] +// * VPSIGNW m128, xmm, xmm [AVX] +// * VPSIGNW ymm, ymm, ymm [AVX2] +// * VPSIGNW m256, ymm, ymm [AVX2] +// +func (self *Program) VPSIGNW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSIGNW", 3, Operands { v0, v1, v2 }) + // VPSIGNW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSIGNW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSIGNW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSIGNW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSIGNW") + } + return p +} + +// VPSLLD performs "Shift Packed Doubleword Data Left Logical". +// +// Mnemonic : VPSLLD +// Supported forms : (18 forms) +// +// * VPSLLD imm8, xmm, xmm [AVX] +// * VPSLLD xmm, xmm, xmm [AVX] +// * VPSLLD m128, xmm, xmm [AVX] +// * VPSLLD imm8, ymm, ymm [AVX2] +// * VPSLLD xmm, ymm, ymm [AVX2] +// * VPSLLD m128, ymm, ymm [AVX2] +// * VPSLLD imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPSLLD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPSLLD xmm, zmm, zmm{k}{z} [AVX512F] +// * VPSLLD m128, zmm, zmm{k}{z} [AVX512F] +// * VPSLLD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLD m128, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLD xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLD m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSLLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSLLD", 3, Operands { v0, v1, v2 }) + // VPSLLD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x72) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLD imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x72) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLD m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLD imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(6, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLD m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLD imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(6, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(6, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLD m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLD m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPSLLD") + } + return p +} + +// VPSLLDQ performs "Shift Packed Double Quadword Left Logical". +// +// Mnemonic : VPSLLDQ +// Supported forms : (8 forms) +// +// * VPSLLDQ imm8, xmm, xmm [AVX] +// * VPSLLDQ imm8, ymm, ymm [AVX2] +// * VPSLLDQ imm8, zmm, zmm [AVX512BW] +// * VPSLLDQ imm8, m512, zmm [AVX512BW] +// * VPSLLDQ imm8, xmm, xmm [AVX512BW,AVX512VL] +// * VPSLLDQ imm8, m128, xmm [AVX512BW,AVX512VL] +// * VPSLLDQ imm8, ymm, ymm [AVX512BW,AVX512VL] +// * VPSLLDQ imm8, m256, ymm [AVX512BW,AVX512VL] +// +func (self *Program) VPSLLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSLLDQ", 3, Operands { v0, v1, v2 }) + // VPSLLDQ imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLDQ imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLDQ imm8, zmm, zmm + if isImm8(v0) && isZMM(v1) && isZMM(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x40) + m.emit(0x73) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLDQ imm8, m512, zmm + if isImm8(v0) && isM512(v1) && isZMM(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x73) + m.mrsd(7, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLDQ imm8, xmm, xmm + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00) + m.emit(0x73) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLDQ imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x73) + m.mrsd(7, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLDQ imm8, ymm, ymm + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x20) + m.emit(0x73) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLDQ imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x73) + m.mrsd(7, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSLLDQ") + } + return p +} + +// VPSLLQ performs "Shift Packed Quadword Data Left Logical". +// +// Mnemonic : VPSLLQ +// Supported forms : (18 forms) +// +// * VPSLLQ imm8, xmm, xmm [AVX] +// * VPSLLQ xmm, xmm, xmm [AVX] +// * VPSLLQ m128, xmm, xmm [AVX] +// * VPSLLQ imm8, ymm, ymm [AVX2] +// * VPSLLQ xmm, ymm, ymm [AVX2] +// * VPSLLQ m128, ymm, ymm [AVX2] +// * VPSLLQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPSLLQ imm8, zmm, zmm{k}{z} [AVX512F] +// * VPSLLQ xmm, zmm, zmm{k}{z} [AVX512F] +// * VPSLLQ m128, zmm, zmm{k}{z} [AVX512F] +// * VPSLLQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLQ m128, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLQ xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLQ m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSLLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSLLQ", 3, Operands { v0, v1, v2 }) + // VPSLLQ imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLQ imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLQ m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLQ imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x73) + m.mrsd(6, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x73) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xf3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLQ m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf3) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLQ imm8, m128/m64bcst, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x73) + m.mrsd(6, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x73) + m.mrsd(6, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x73) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xf3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLQ m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf3) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLQ imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x73) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xf3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLQ m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf3) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPSLLQ") + } + return p +} + +// VPSLLVD performs "Variable Shift Packed Doubleword Data Left Logical". +// +// Mnemonic : VPSLLVD +// Supported forms : (10 forms) +// +// * VPSLLVD xmm, xmm, xmm [AVX2] +// * VPSLLVD m128, xmm, xmm [AVX2] +// * VPSLLVD ymm, ymm, ymm [AVX2] +// * VPSLLVD m256, ymm, ymm [AVX2] +// * VPSLLVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSLLVD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSLLVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSLLVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSLLVD", 3, Operands { v0, v1, v2 }) + // VPSLLVD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLVD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLVD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSLLVD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLVD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSLLVD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSLLVD") + } + return p +} + +// VPSLLVQ performs "Variable Shift Packed Quadword Data Left Logical". +// +// Mnemonic : VPSLLVQ +// Supported forms : (10 forms) +// +// * VPSLLVQ xmm, xmm, xmm [AVX2] +// * VPSLLVQ m128, xmm, xmm [AVX2] +// * VPSLLVQ ymm, ymm, ymm [AVX2] +// * VPSLLVQ m256, ymm, ymm [AVX2] +// * VPSLLVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSLLVQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSLLVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSLLVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSLLVQ", 3, Operands { v0, v1, v2 }) + // VPSLLVQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLVQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLVQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSLLVQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLVQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSLLVQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSLLVQ") + } + return p +} + +// VPSLLVW performs "Variable Shift Packed Word Data Left Logical". +// +// Mnemonic : VPSLLVW +// Supported forms : (6 forms) +// +// * VPSLLVW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSLLVW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSLLVW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLVW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLVW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLVW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSLLVW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSLLVW", 3, Operands { v0, v1, v2 }) + // VPSLLVW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x12) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x12) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSLLVW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x12) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x12) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLVW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x12) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x12) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSLLVW") + } + return p +} + +// VPSLLW performs "Shift Packed Word Data Left Logical". +// +// Mnemonic : VPSLLW +// Supported forms : (18 forms) +// +// * VPSLLW imm8, xmm, xmm [AVX] +// * VPSLLW xmm, xmm, xmm [AVX] +// * VPSLLW m128, xmm, xmm [AVX] +// * VPSLLW imm8, ymm, ymm [AVX2] +// * VPSLLW xmm, ymm, ymm [AVX2] +// * VPSLLW m128, ymm, ymm [AVX2] +// * VPSLLW imm8, zmm, zmm{k}{z} [AVX512BW] +// * VPSLLW xmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSLLW m128, zmm, zmm{k}{z} [AVX512BW] +// * VPSLLW imm8, m512, zmm{k}{z} [AVX512BW] +// * VPSLLW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLW xmm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLW m128, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSLLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSLLW", 3, Operands { v0, v1, v2 }) + // VPSLLW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x71) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf1) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLW imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x71) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLW xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLW m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf1) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLW imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x71) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLW xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLW m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLW imm8, m512, zmm{k}{z} + if isImm8(v0) && isM512(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(6, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLW imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x71) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLW imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x71) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLW xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLW m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLW imm8, m128, xmm{k}{z} + if isImm8(v0) && isM128(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(6, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLW imm8, m256, ymm{k}{z} + if isImm8(v0) && isM256(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(6, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSLLW") + } + return p +} + +// VPSRAD performs "Shift Packed Doubleword Data Right Arithmetic". +// +// Mnemonic : VPSRAD +// Supported forms : (18 forms) +// +// * VPSRAD imm8, xmm, xmm [AVX] +// * VPSRAD xmm, xmm, xmm [AVX] +// * VPSRAD m128, xmm, xmm [AVX] +// * VPSRAD imm8, ymm, ymm [AVX2] +// * VPSRAD xmm, ymm, ymm [AVX2] +// * VPSRAD m128, ymm, ymm [AVX2] +// * VPSRAD imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPSRAD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPSRAD xmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRAD m128, zmm, zmm{k}{z} [AVX512F] +// * VPSRAD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAD m128, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAD xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAD m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRAD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRAD", 3, Operands { v0, v1, v2 }) + // VPSRAD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRAD imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAD m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRAD imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(4, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAD m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAD imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(4, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(4, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAD m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAD m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRAD") + } + return p +} + +// VPSRAQ performs "Shift Packed Quadword Data Right Arithmetic". +// +// Mnemonic : VPSRAQ +// Supported forms : (12 forms) +// +// * VPSRAQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPSRAQ imm8, zmm, zmm{k}{z} [AVX512F] +// * VPSRAQ xmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRAQ m128, zmm, zmm{k}{z} [AVX512F] +// * VPSRAQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAQ m128, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAQ xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAQ m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRAQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRAQ", 3, Operands { v0, v1, v2 }) + // VPSRAQ imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(4, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAQ imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAQ xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAQ m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAQ imm8, m128/m64bcst, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(4, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAQ imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(4, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAQ imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAQ m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAQ imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAQ xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAQ m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRAQ") + } + return p +} + +// VPSRAVD performs "Variable Shift Packed Doubleword Data Right Arithmetic". +// +// Mnemonic : VPSRAVD +// Supported forms : (10 forms) +// +// * VPSRAVD xmm, xmm, xmm [AVX2] +// * VPSRAVD m128, xmm, xmm [AVX2] +// * VPSRAVD ymm, ymm, ymm [AVX2] +// * VPSRAVD m256, ymm, ymm [AVX2] +// * VPSRAVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSRAVD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRAVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRAVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRAVD", 3, Operands { v0, v1, v2 }) + // VPSRAVD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRAVD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRAVD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSRAVD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAVD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSRAVD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRAVD") + } + return p +} + +// VPSRAVQ performs "Variable Shift Packed Quadword Data Right Arithmetic". +// +// Mnemonic : VPSRAVQ +// Supported forms : (6 forms) +// +// * VPSRAVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSRAVQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRAVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRAVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRAVQ", 3, Operands { v0, v1, v2 }) + // VPSRAVQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSRAVQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAVQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSRAVQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRAVQ") + } + return p +} + +// VPSRAVW performs "Variable Shift Packed Word Data Right Arithmetic". +// +// Mnemonic : VPSRAVW +// Supported forms : (6 forms) +// +// * VPSRAVW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSRAVW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSRAVW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAVW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAVW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAVW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSRAVW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRAVW", 3, Operands { v0, v1, v2 }) + // VPSRAVW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x11) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x11) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSRAVW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x11) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x11) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAVW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x11) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x11) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRAVW") + } + return p +} + +// VPSRAW performs "Shift Packed Word Data Right Arithmetic". +// +// Mnemonic : VPSRAW +// Supported forms : (18 forms) +// +// * VPSRAW imm8, xmm, xmm [AVX] +// * VPSRAW xmm, xmm, xmm [AVX] +// * VPSRAW m128, xmm, xmm [AVX] +// * VPSRAW imm8, ymm, ymm [AVX2] +// * VPSRAW xmm, ymm, ymm [AVX2] +// * VPSRAW m128, ymm, ymm [AVX2] +// * VPSRAW imm8, zmm, zmm{k}{z} [AVX512BW] +// * VPSRAW xmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSRAW m128, zmm, zmm{k}{z} [AVX512BW] +// * VPSRAW imm8, m512, zmm{k}{z} [AVX512BW] +// * VPSRAW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAW xmm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAW m128, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSRAW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRAW", 3, Operands { v0, v1, v2 }) + // VPSRAW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x71) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe1) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRAW imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x71) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAW xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAW m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe1) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRAW imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x71) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAW xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAW m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAW imm8, m512, zmm{k}{z} + if isImm8(v0) && isM512(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(4, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAW imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x71) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAW imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x71) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAW xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAW m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAW imm8, m128, xmm{k}{z} + if isImm8(v0) && isM128(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(4, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAW imm8, m256, ymm{k}{z} + if isImm8(v0) && isM256(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(4, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRAW") + } + return p +} + +// VPSRLD performs "Shift Packed Doubleword Data Right Logical". +// +// Mnemonic : VPSRLD +// Supported forms : (18 forms) +// +// * VPSRLD imm8, xmm, xmm [AVX] +// * VPSRLD xmm, xmm, xmm [AVX] +// * VPSRLD m128, xmm, xmm [AVX] +// * VPSRLD imm8, ymm, ymm [AVX2] +// * VPSRLD xmm, ymm, ymm [AVX2] +// * VPSRLD m128, ymm, ymm [AVX2] +// * VPSRLD imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPSRLD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPSRLD xmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRLD m128, zmm, zmm{k}{z} [AVX512F] +// * VPSRLD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLD m128, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLD xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLD m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRLD", 3, Operands { v0, v1, v2 }) + // VPSRLD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x72) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLD imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x72) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLD m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLD imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(2, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLD m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLD imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(2, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(2, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLD m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLD m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRLD") + } + return p +} + +// VPSRLDQ performs "Shift Packed Double Quadword Right Logical". +// +// Mnemonic : VPSRLDQ +// Supported forms : (8 forms) +// +// * VPSRLDQ imm8, xmm, xmm [AVX] +// * VPSRLDQ imm8, ymm, ymm [AVX2] +// * VPSRLDQ imm8, zmm, zmm [AVX512BW] +// * VPSRLDQ imm8, m512, zmm [AVX512BW] +// * VPSRLDQ imm8, xmm, xmm [AVX512BW,AVX512VL] +// * VPSRLDQ imm8, m128, xmm [AVX512BW,AVX512VL] +// * VPSRLDQ imm8, ymm, ymm [AVX512BW,AVX512VL] +// * VPSRLDQ imm8, m256, ymm [AVX512BW,AVX512VL] +// +func (self *Program) VPSRLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRLDQ", 3, Operands { v0, v1, v2 }) + // VPSRLDQ imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLDQ imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLDQ imm8, zmm, zmm + if isImm8(v0) && isZMM(v1) && isZMM(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x40) + m.emit(0x73) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLDQ imm8, m512, zmm + if isImm8(v0) && isM512(v1) && isZMM(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x73) + m.mrsd(3, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLDQ imm8, xmm, xmm + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00) + m.emit(0x73) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLDQ imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x73) + m.mrsd(3, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLDQ imm8, ymm, ymm + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x20) + m.emit(0x73) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLDQ imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x73) + m.mrsd(3, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRLDQ") + } + return p +} + +// VPSRLQ performs "Shift Packed Quadword Data Right Logical". +// +// Mnemonic : VPSRLQ +// Supported forms : (18 forms) +// +// * VPSRLQ imm8, xmm, xmm [AVX] +// * VPSRLQ xmm, xmm, xmm [AVX] +// * VPSRLQ m128, xmm, xmm [AVX] +// * VPSRLQ imm8, ymm, ymm [AVX2] +// * VPSRLQ xmm, ymm, ymm [AVX2] +// * VPSRLQ m128, ymm, ymm [AVX2] +// * VPSRLQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPSRLQ imm8, zmm, zmm{k}{z} [AVX512F] +// * VPSRLQ xmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRLQ m128, zmm, zmm{k}{z} [AVX512F] +// * VPSRLQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLQ m128, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLQ xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLQ m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRLQ", 3, Operands { v0, v1, v2 }) + // VPSRLQ imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd3) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLQ imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLQ m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd3) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLQ imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x73) + m.mrsd(2, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x73) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLQ m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd3) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLQ imm8, m128/m64bcst, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x73) + m.mrsd(2, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x73) + m.mrsd(2, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x73) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLQ m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd3) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLQ imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x73) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLQ m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd3) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRLQ") + } + return p +} + +// VPSRLVD performs "Variable Shift Packed Doubleword Data Right Logical". +// +// Mnemonic : VPSRLVD +// Supported forms : (10 forms) +// +// * VPSRLVD xmm, xmm, xmm [AVX2] +// * VPSRLVD m128, xmm, xmm [AVX2] +// * VPSRLVD ymm, ymm, ymm [AVX2] +// * VPSRLVD m256, ymm, ymm [AVX2] +// * VPSRLVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSRLVD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRLVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRLVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRLVD", 3, Operands { v0, v1, v2 }) + // VPSRLVD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLVD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLVD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSRLVD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLVD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSRLVD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRLVD") + } + return p +} + +// VPSRLVQ performs "Variable Shift Packed Quadword Data Right Logical". +// +// Mnemonic : VPSRLVQ +// Supported forms : (10 forms) +// +// * VPSRLVQ xmm, xmm, xmm [AVX2] +// * VPSRLVQ m128, xmm, xmm [AVX2] +// * VPSRLVQ ymm, ymm, ymm [AVX2] +// * VPSRLVQ m256, ymm, ymm [AVX2] +// * VPSRLVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSRLVQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRLVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRLVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRLVQ", 3, Operands { v0, v1, v2 }) + // VPSRLVQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLVQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLVQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSRLVQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLVQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSRLVQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRLVQ") + } + return p +} + +// VPSRLVW performs "Variable Shift Packed Word Data Right Logical". +// +// Mnemonic : VPSRLVW +// Supported forms : (6 forms) +// +// * VPSRLVW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSRLVW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSRLVW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLVW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLVW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLVW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSRLVW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRLVW", 3, Operands { v0, v1, v2 }) + // VPSRLVW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x10) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSRLVW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x10) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLVW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x10) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRLVW") + } + return p +} + +// VPSRLW performs "Shift Packed Word Data Right Logical". +// +// Mnemonic : VPSRLW +// Supported forms : (18 forms) +// +// * VPSRLW imm8, xmm, xmm [AVX] +// * VPSRLW xmm, xmm, xmm [AVX] +// * VPSRLW m128, xmm, xmm [AVX] +// * VPSRLW imm8, ymm, ymm [AVX2] +// * VPSRLW xmm, ymm, ymm [AVX2] +// * VPSRLW m128, ymm, ymm [AVX2] +// * VPSRLW imm8, zmm, zmm{k}{z} [AVX512BW] +// * VPSRLW xmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSRLW m128, zmm, zmm{k}{z} [AVX512BW] +// * VPSRLW imm8, m512, zmm{k}{z} [AVX512BW] +// * VPSRLW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLW xmm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLW m128, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSRLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRLW", 3, Operands { v0, v1, v2 }) + // VPSRLW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x71) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd1) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLW imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x71) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLW xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLW m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd1) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLW imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x71) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLW xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLW m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLW imm8, m512, zmm{k}{z} + if isImm8(v0) && isM512(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(2, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLW imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x71) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLW imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x71) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLW xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLW m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLW imm8, m128, xmm{k}{z} + if isImm8(v0) && isM128(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(2, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLW imm8, m256, ymm{k}{z} + if isImm8(v0) && isM256(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(2, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRLW") + } + return p +} + +// VPSUBB performs "Subtract Packed Byte Integers". +// +// Mnemonic : VPSUBB +// Supported forms : (10 forms) +// +// * VPSUBB xmm, xmm, xmm [AVX] +// * VPSUBB m128, xmm, xmm [AVX] +// * VPSUBB ymm, ymm, ymm [AVX2] +// * VPSUBB m256, ymm, ymm [AVX2] +// * VPSUBB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSUBB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBB", 3, Operands { v0, v1, v2 }) + // VPSUBB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xf8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf8) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xf8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf8) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xf8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf8) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBB") + } + return p +} + +// VPSUBD performs "Subtract Packed Doubleword Integers". +// +// Mnemonic : VPSUBD +// Supported forms : (10 forms) +// +// * VPSUBD xmm, xmm, xmm [AVX] +// * VPSUBD m128, xmm, xmm [AVX] +// * VPSUBD ymm, ymm, ymm [AVX2] +// * VPSUBD m256, ymm, ymm [AVX2] +// * VPSUBD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSUBD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSUBD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSUBD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSUBD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSUBD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSUBD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBD", 3, Operands { v0, v1, v2 }) + // VPSUBD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfa) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfa) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfa) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xfa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfa) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xfa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfa) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSUBD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xfa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBD") + } + return p +} + +// VPSUBQ performs "Subtract Packed Quadword Integers". +// +// Mnemonic : VPSUBQ +// Supported forms : (10 forms) +// +// * VPSUBQ xmm, xmm, xmm [AVX] +// * VPSUBQ m128, xmm, xmm [AVX] +// * VPSUBQ ymm, ymm, ymm [AVX2] +// * VPSUBQ m256, ymm, ymm [AVX2] +// * VPSUBQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSUBQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSUBQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSUBQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSUBQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSUBQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSUBQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBQ", 3, Operands { v0, v1, v2 }) + // VPSUBQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfb) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xfb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfb) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xfb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfb) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSUBQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xfb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBQ") + } + return p +} + +// VPSUBSB performs "Subtract Packed Signed Byte Integers with Signed Saturation". +// +// Mnemonic : VPSUBSB +// Supported forms : (10 forms) +// +// * VPSUBSB xmm, xmm, xmm [AVX] +// * VPSUBSB m128, xmm, xmm [AVX] +// * VPSUBSB ymm, ymm, ymm [AVX2] +// * VPSUBSB m256, ymm, ymm [AVX2] +// * VPSUBSB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBSB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSUBSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBSB", 3, Operands { v0, v1, v2 }) + // VPSUBSB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBSB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBSB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe8) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBSB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe8) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBSB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe8) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBSB") + } + return p +} + +// VPSUBSW performs "Subtract Packed Signed Word Integers with Signed Saturation". +// +// Mnemonic : VPSUBSW +// Supported forms : (10 forms) +// +// * VPSUBSW xmm, xmm, xmm [AVX] +// * VPSUBSW m128, xmm, xmm [AVX] +// * VPSUBSW ymm, ymm, ymm [AVX2] +// * VPSUBSW m256, ymm, ymm [AVX2] +// * VPSUBSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSUBSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBSW", 3, Operands { v0, v1, v2 }) + // VPSUBSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe9) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe9) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe9) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBSW") + } + return p +} + +// VPSUBUSB performs "Subtract Packed Unsigned Byte Integers with Unsigned Saturation". +// +// Mnemonic : VPSUBUSB +// Supported forms : (10 forms) +// +// * VPSUBUSB xmm, xmm, xmm [AVX] +// * VPSUBUSB m128, xmm, xmm [AVX] +// * VPSUBUSB ymm, ymm, ymm [AVX2] +// * VPSUBUSB m256, ymm, ymm [AVX2] +// * VPSUBUSB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBUSB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBUSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBUSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBUSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBUSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSUBUSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBUSB", 3, Operands { v0, v1, v2 }) + // VPSUBUSB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBUSB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBUSB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xd8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd8) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBUSB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xd8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd8) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBUSB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xd8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd8) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBUSB") + } + return p +} + +// VPSUBUSW performs "Subtract Packed Unsigned Word Integers with Unsigned Saturation". +// +// Mnemonic : VPSUBUSW +// Supported forms : (10 forms) +// +// * VPSUBUSW xmm, xmm, xmm [AVX] +// * VPSUBUSW m128, xmm, xmm [AVX] +// * VPSUBUSW ymm, ymm, ymm [AVX2] +// * VPSUBUSW m256, ymm, ymm [AVX2] +// * VPSUBUSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBUSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBUSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBUSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBUSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBUSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSUBUSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBUSW", 3, Operands { v0, v1, v2 }) + // VPSUBUSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBUSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBUSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xd9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd9) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBUSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xd9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd9) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBUSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xd9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd9) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBUSW") + } + return p +} + +// VPSUBW performs "Subtract Packed Word Integers". +// +// Mnemonic : VPSUBW +// Supported forms : (10 forms) +// +// * VPSUBW xmm, xmm, xmm [AVX] +// * VPSUBW m128, xmm, xmm [AVX] +// * VPSUBW ymm, ymm, ymm [AVX2] +// * VPSUBW m256, ymm, ymm [AVX2] +// * VPSUBW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSUBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBW", 3, Operands { v0, v1, v2 }) + // VPSUBW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xf9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf9) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xf9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf9) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xf9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf9) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBW") + } + return p +} + +// VPTERNLOGD performs "Bitwise Ternary Logical Operation on Doubleword Values". +// +// Mnemonic : VPTERNLOGD +// Supported forms : (6 forms) +// +// * VPTERNLOGD imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPTERNLOGD imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VPTERNLOGD imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPTERNLOGD imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPTERNLOGD imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPTERNLOGD imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPTERNLOGD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPTERNLOGD", 4, Operands { v0, v1, v2, v3 }) + // VPTERNLOGD imm8, m512/m32bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x25) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGD imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x25) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGD imm8, m128/m32bcst, xmm, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x25) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGD imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x25) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGD imm8, m256/m32bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x25) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGD imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x25) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPTERNLOGD") + } + return p +} + +// VPTERNLOGQ performs "Bitwise Ternary Logical Operation on Quadword Values". +// +// Mnemonic : VPTERNLOGQ +// Supported forms : (6 forms) +// +// * VPTERNLOGQ imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPTERNLOGQ imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VPTERNLOGQ imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPTERNLOGQ imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPTERNLOGQ imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPTERNLOGQ imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPTERNLOGQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPTERNLOGQ", 4, Operands { v0, v1, v2, v3 }) + // VPTERNLOGQ imm8, m512/m64bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x25) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGQ imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x25) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGQ imm8, m128/m64bcst, xmm, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x25) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGQ imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x25) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGQ imm8, m256/m64bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x25) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGQ imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x25) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPTERNLOGQ") + } + return p +} + +// VPTEST performs "Packed Logical Compare". +// +// Mnemonic : VPTEST +// Supported forms : (4 forms) +// +// * VPTEST xmm, xmm [AVX] +// * VPTEST m128, xmm [AVX] +// * VPTEST ymm, ymm [AVX] +// * VPTEST m256, ymm [AVX] +// +func (self *Program) VPTEST(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPTEST", 2, Operands { v0, v1 }) + // VPTEST xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x17) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPTEST m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x17) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPTEST ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x17) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPTEST m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x17) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPTEST") + } + return p +} + +// VPTESTMB performs "Logical AND of Packed Byte Integer Values and Set Mask". +// +// Mnemonic : VPTESTMB +// Supported forms : (6 forms) +// +// * VPTESTMB zmm, zmm, k{k} [AVX512BW] +// * VPTESTMB m512, zmm, k{k} [AVX512BW] +// * VPTESTMB xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTMB m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTMB ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPTESTMB m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPTESTMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTMB", 3, Operands { v0, v1, v2 }) + // VPTESTMB zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMB m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTMB xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMB m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTMB ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMB m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTMB") + } + return p +} + +// VPTESTMD performs "Logical AND of Packed Doubleword Integer Values and Set Mask". +// +// Mnemonic : VPTESTMD +// Supported forms : (6 forms) +// +// * VPTESTMD m512/m32bcst, zmm, k{k} [AVX512F] +// * VPTESTMD zmm, zmm, k{k} [AVX512F] +// * VPTESTMD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTMD xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTMD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPTESTMD ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPTESTMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTMD", 3, Operands { v0, v1, v2 }) + // VPTESTMD m512/m32bcst, zmm, k{k} + if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTMD zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMD m128/m32bcst, xmm, k{k} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTMD xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMD m256/m32bcst, ymm, k{k} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPTESTMD ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTMD") + } + return p +} + +// VPTESTMQ performs "Logical AND of Packed Quadword Integer Values and Set Mask". +// +// Mnemonic : VPTESTMQ +// Supported forms : (6 forms) +// +// * VPTESTMQ m512/m64bcst, zmm, k{k} [AVX512F] +// * VPTESTMQ zmm, zmm, k{k} [AVX512F] +// * VPTESTMQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTMQ xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTMQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPTESTMQ ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPTESTMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTMQ", 3, Operands { v0, v1, v2 }) + // VPTESTMQ m512/m64bcst, zmm, k{k} + if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTMQ zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMQ m128/m64bcst, xmm, k{k} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTMQ xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMQ m256/m64bcst, ymm, k{k} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPTESTMQ ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTMQ") + } + return p +} + +// VPTESTMW performs "Logical AND of Packed Word Integer Values and Set Mask". +// +// Mnemonic : VPTESTMW +// Supported forms : (6 forms) +// +// * VPTESTMW zmm, zmm, k{k} [AVX512BW] +// * VPTESTMW m512, zmm, k{k} [AVX512BW] +// * VPTESTMW xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTMW m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTMW ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPTESTMW m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPTESTMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTMW", 3, Operands { v0, v1, v2 }) + // VPTESTMW zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMW m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTMW xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMW m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTMW ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMW m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTMW") + } + return p +} + +// VPTESTNMB performs "Logical NAND of Packed Byte Integer Values and Set Mask". +// +// Mnemonic : VPTESTNMB +// Supported forms : (6 forms) +// +// * VPTESTNMB zmm, zmm, k{k} [AVX512BW,AVX512F] +// * VPTESTNMB m512, zmm, k{k} [AVX512BW,AVX512F] +// * VPTESTNMB xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTNMB m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTNMB ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPTESTNMB m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPTESTNMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTNMB", 3, Operands { v0, v1, v2 }) + // VPTESTNMB zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMB m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTNMB xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMB m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTNMB ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMB m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTNMB") + } + return p +} + +// VPTESTNMD performs "Logical NAND of Packed Doubleword Integer Values and Set Mask". +// +// Mnemonic : VPTESTNMD +// Supported forms : (6 forms) +// +// * VPTESTNMD m512/m32bcst, zmm, k{k} [AVX512F] +// * VPTESTNMD zmm, zmm, k{k} [AVX512F] +// * VPTESTNMD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTNMD xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTNMD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPTESTNMD ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPTESTNMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTNMD", 3, Operands { v0, v1, v2 }) + // VPTESTNMD m512/m32bcst, zmm, k{k} + if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTNMD zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMD m128/m32bcst, xmm, k{k} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTNMD xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMD m256/m32bcst, ymm, k{k} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPTESTNMD ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTNMD") + } + return p +} + +// VPTESTNMQ performs "Logical NAND of Packed Quadword Integer Values and Set Mask". +// +// Mnemonic : VPTESTNMQ +// Supported forms : (6 forms) +// +// * VPTESTNMQ m512/m64bcst, zmm, k{k} [AVX512F] +// * VPTESTNMQ zmm, zmm, k{k} [AVX512F] +// * VPTESTNMQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTNMQ xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTNMQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPTESTNMQ ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPTESTNMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTNMQ", 3, Operands { v0, v1, v2 }) + // VPTESTNMQ m512/m64bcst, zmm, k{k} + if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x86, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTNMQ zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMQ m128/m64bcst, xmm, k{k} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTNMQ xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMQ m256/m64bcst, ymm, k{k} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x86, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPTESTNMQ ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTNMQ") + } + return p +} + +// VPTESTNMW performs "Logical NAND of Packed Word Integer Values and Set Mask". +// +// Mnemonic : VPTESTNMW +// Supported forms : (6 forms) +// +// * VPTESTNMW zmm, zmm, k{k} [AVX512BW,AVX512F] +// * VPTESTNMW m512, zmm, k{k} [AVX512BW,AVX512F] +// * VPTESTNMW xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTNMW m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTNMW ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPTESTNMW m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPTESTNMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTNMW", 3, Operands { v0, v1, v2 }) + // VPTESTNMW zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMW m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x86, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTNMW xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMW m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTNMW ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMW m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x86, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTNMW") + } + return p +} + +// VPUNPCKHBW performs "Unpack and Interleave High-Order Bytes into Words". +// +// Mnemonic : VPUNPCKHBW +// Supported forms : (10 forms) +// +// * VPUNPCKHBW xmm, xmm, xmm [AVX] +// * VPUNPCKHBW m128, xmm, xmm [AVX] +// * VPUNPCKHBW ymm, ymm, ymm [AVX2] +// * VPUNPCKHBW m256, ymm, ymm [AVX2] +// * VPUNPCKHBW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKHBW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKHBW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKHBW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKHBW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKHBW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPUNPCKHBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKHBW", 3, Operands { v0, v1, v2 }) + // VPUNPCKHBW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x68) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHBW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x68) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHBW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x68) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHBW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x68) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHBW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x68) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHBW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x68) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKHBW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x68) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHBW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x68) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKHBW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x68) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHBW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x68) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKHBW") + } + return p +} + +// VPUNPCKHDQ performs "Unpack and Interleave High-Order Doublewords into Quadwords". +// +// Mnemonic : VPUNPCKHDQ +// Supported forms : (10 forms) +// +// * VPUNPCKHDQ xmm, xmm, xmm [AVX] +// * VPUNPCKHDQ m128, xmm, xmm [AVX] +// * VPUNPCKHDQ ymm, ymm, ymm [AVX2] +// * VPUNPCKHDQ m256, ymm, ymm [AVX2] +// * VPUNPCKHDQ m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKHDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKHDQ m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKHDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKHDQ m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKHDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPUNPCKHDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKHDQ", 3, Operands { v0, v1, v2 }) + // VPUNPCKHDQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHDQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHDQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHDQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHDQ m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6a) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKHDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHDQ m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6a) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKHDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHDQ m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6a) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPUNPCKHDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKHDQ") + } + return p +} + +// VPUNPCKHQDQ performs "Unpack and Interleave High-Order Quadwords into Double Quadwords". +// +// Mnemonic : VPUNPCKHQDQ +// Supported forms : (10 forms) +// +// * VPUNPCKHQDQ xmm, xmm, xmm [AVX] +// * VPUNPCKHQDQ m128, xmm, xmm [AVX] +// * VPUNPCKHQDQ ymm, ymm, ymm [AVX2] +// * VPUNPCKHQDQ m256, ymm, ymm [AVX2] +// * VPUNPCKHQDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKHQDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKHQDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKHQDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKHQDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKHQDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPUNPCKHQDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKHQDQ", 3, Operands { v0, v1, v2 }) + // VPUNPCKHQDQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHQDQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHQDQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHQDQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHQDQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKHQDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHQDQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKHQDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHQDQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPUNPCKHQDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKHQDQ") + } + return p +} + +// VPUNPCKHWD performs "Unpack and Interleave High-Order Words into Doublewords". +// +// Mnemonic : VPUNPCKHWD +// Supported forms : (10 forms) +// +// * VPUNPCKHWD xmm, xmm, xmm [AVX] +// * VPUNPCKHWD m128, xmm, xmm [AVX] +// * VPUNPCKHWD ymm, ymm, ymm [AVX2] +// * VPUNPCKHWD m256, ymm, ymm [AVX2] +// * VPUNPCKHWD zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKHWD m512, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKHWD xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKHWD m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKHWD ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKHWD m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPUNPCKHWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKHWD", 3, Operands { v0, v1, v2 }) + // VPUNPCKHWD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHWD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHWD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHWD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHWD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHWD m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKHWD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHWD m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKHWD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHWD m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKHWD") + } + return p +} + +// VPUNPCKLBW performs "Unpack and Interleave Low-Order Bytes into Words". +// +// Mnemonic : VPUNPCKLBW +// Supported forms : (10 forms) +// +// * VPUNPCKLBW xmm, xmm, xmm [AVX] +// * VPUNPCKLBW m128, xmm, xmm [AVX] +// * VPUNPCKLBW ymm, ymm, ymm [AVX2] +// * VPUNPCKLBW m256, ymm, ymm [AVX2] +// * VPUNPCKLBW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKLBW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKLBW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKLBW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKLBW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKLBW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPUNPCKLBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKLBW", 3, Operands { v0, v1, v2 }) + // VPUNPCKLBW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x60) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLBW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x60) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLBW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x60) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLBW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x60) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLBW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x60) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLBW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x60) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKLBW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x60) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLBW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x60) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKLBW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x60) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLBW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x60) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKLBW") + } + return p +} + +// VPUNPCKLDQ performs "Unpack and Interleave Low-Order Doublewords into Quadwords". +// +// Mnemonic : VPUNPCKLDQ +// Supported forms : (10 forms) +// +// * VPUNPCKLDQ xmm, xmm, xmm [AVX] +// * VPUNPCKLDQ m128, xmm, xmm [AVX] +// * VPUNPCKLDQ ymm, ymm, ymm [AVX2] +// * VPUNPCKLDQ m256, ymm, ymm [AVX2] +// * VPUNPCKLDQ m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKLDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKLDQ m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKLDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKLDQ m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKLDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPUNPCKLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKLDQ", 3, Operands { v0, v1, v2 }) + // VPUNPCKLDQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x62) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLDQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x62) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLDQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x62) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLDQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x62) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLDQ m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x62) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKLDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x62) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLDQ m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x62) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKLDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x62) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLDQ m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x62) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPUNPCKLDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x62) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKLDQ") + } + return p +} + +// VPUNPCKLQDQ performs "Unpack and Interleave Low-Order Quadwords into Double Quadwords". +// +// Mnemonic : VPUNPCKLQDQ +// Supported forms : (10 forms) +// +// * VPUNPCKLQDQ xmm, xmm, xmm [AVX] +// * VPUNPCKLQDQ m128, xmm, xmm [AVX] +// * VPUNPCKLQDQ ymm, ymm, ymm [AVX2] +// * VPUNPCKLQDQ m256, ymm, ymm [AVX2] +// * VPUNPCKLQDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKLQDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKLQDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKLQDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKLQDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKLQDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPUNPCKLQDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKLQDQ", 3, Operands { v0, v1, v2 }) + // VPUNPCKLQDQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLQDQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLQDQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLQDQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLQDQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKLQDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLQDQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKLQDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLQDQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPUNPCKLQDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKLQDQ") + } + return p +} + +// VPUNPCKLWD performs "Unpack and Interleave Low-Order Words into Doublewords". +// +// Mnemonic : VPUNPCKLWD +// Supported forms : (10 forms) +// +// * VPUNPCKLWD xmm, xmm, xmm [AVX] +// * VPUNPCKLWD m128, xmm, xmm [AVX] +// * VPUNPCKLWD ymm, ymm, ymm [AVX2] +// * VPUNPCKLWD m256, ymm, ymm [AVX2] +// * VPUNPCKLWD zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKLWD m512, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKLWD xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKLWD m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKLWD ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKLWD m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPUNPCKLWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKLWD", 3, Operands { v0, v1, v2 }) + // VPUNPCKLWD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x61) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLWD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x61) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLWD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x61) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLWD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x61) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLWD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x61) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLWD m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x61) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKLWD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x61) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLWD m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x61) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKLWD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x61) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLWD m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x61) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKLWD") + } + return p +} + +// VPXOR performs "Packed Bitwise Logical Exclusive OR". +// +// Mnemonic : VPXOR +// Supported forms : (4 forms) +// +// * VPXOR xmm, xmm, xmm [AVX] +// * VPXOR m128, xmm, xmm [AVX] +// * VPXOR ymm, ymm, ymm [AVX2] +// * VPXOR m256, ymm, ymm [AVX2] +// +func (self *Program) VPXOR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPXOR", 3, Operands { v0, v1, v2 }) + // VPXOR xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPXOR m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPXOR ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPXOR m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPXOR") + } + return p +} + +// VPXORD performs "Bitwise Logical Exclusive OR of Packed Doubleword Integers". +// +// Mnemonic : VPXORD +// Supported forms : (6 forms) +// +// * VPXORD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPXORD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPXORD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPXORD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPXORD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPXORD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPXORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPXORD", 3, Operands { v0, v1, v2 }) + // VPXORD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPXORD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPXORD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPXORD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPXORD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPXORD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPXORD") + } + return p +} + +// VPXORQ performs "Bitwise Logical Exclusive OR of Packed Quadword Integers". +// +// Mnemonic : VPXORQ +// Supported forms : (6 forms) +// +// * VPXORQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPXORQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPXORQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPXORQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPXORQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPXORQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPXORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPXORQ", 3, Operands { v0, v1, v2 }) + // VPXORQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPXORQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPXORQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPXORQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPXORQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPXORQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPXORQ") + } + return p +} + +// VRANGEPD performs "Range Restriction Calculation For Packed Pairs of Double-Precision Floating-Point Values". +// +// Mnemonic : VRANGEPD +// Supported forms : (7 forms) +// +// * VRANGEPD imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VRANGEPD imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VRANGEPD imm8, zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VRANGEPD imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VRANGEPD imm8, xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VRANGEPD imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VRANGEPD imm8, ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VRANGEPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRANGEPD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VRANGEPD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VRANGEPD takes 4 or 5 operands") + } + // VRANGEPD imm8, m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x50) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPD imm8, {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x50) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPD imm8, zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x50) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPD imm8, m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x50) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPD imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x50) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPD imm8, m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x50) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPD imm8, ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x50) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRANGEPD") + } + return p +} + +// VRANGEPS performs "Range Restriction Calculation For Packed Pairs of Single-Precision Floating-Point Values". +// +// Mnemonic : VRANGEPS +// Supported forms : (7 forms) +// +// * VRANGEPS imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VRANGEPS imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VRANGEPS imm8, zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VRANGEPS imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VRANGEPS imm8, xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VRANGEPS imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VRANGEPS imm8, ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VRANGEPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRANGEPS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VRANGEPS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VRANGEPS takes 4 or 5 operands") + } + // VRANGEPS imm8, m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x50) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPS imm8, {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x50) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPS imm8, zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x50) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPS imm8, m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x50) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPS imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x50) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPS imm8, m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x50) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPS imm8, ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x50) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRANGEPS") + } + return p +} + +// VRANGESD performs "Range Restriction Calculation For a pair of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VRANGESD +// Supported forms : (3 forms) +// +// * VRANGESD imm8, m64, xmm, xmm{k}{z} [AVX512DQ] +// * VRANGESD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512DQ] +// * VRANGESD imm8, xmm, xmm, xmm{k}{z} [AVX512DQ] +// +func (self *Program) VRANGESD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRANGESD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VRANGESD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VRANGESD takes 4 or 5 operands") + } + // VRANGESD imm8, m64, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x51) + m.mrsd(lcode(v[3]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGESD imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x51) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGESD imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x51) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRANGESD") + } + return p +} + +// VRANGESS performs "Range Restriction Calculation For a pair of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VRANGESS +// Supported forms : (3 forms) +// +// * VRANGESS imm8, m32, xmm, xmm{k}{z} [AVX512DQ] +// * VRANGESS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512DQ] +// * VRANGESS imm8, xmm, xmm, xmm{k}{z} [AVX512DQ] +// +func (self *Program) VRANGESS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRANGESS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VRANGESS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VRANGESS takes 4 or 5 operands") + } + // VRANGESS imm8, m32, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x51) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGESS imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x51) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGESS imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x51) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRANGESS") + } + return p +} + +// VRCP14PD performs "Compute Approximate Reciprocals of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VRCP14PD +// Supported forms : (6 forms) +// +// * VRCP14PD m512/m64bcst, zmm{k}{z} [AVX512F] +// * VRCP14PD zmm, zmm{k}{z} [AVX512F] +// * VRCP14PD m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VRCP14PD m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VRCP14PD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VRCP14PD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VRCP14PD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VRCP14PD", 2, Operands { v0, v1 }) + // VRCP14PD m512/m64bcst, zmm{k}{z} + if isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRCP14PD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRCP14PD m128/m64bcst, xmm{k}{z} + if isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VRCP14PD m256/m64bcst, ymm{k}{z} + if isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VRCP14PD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRCP14PD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP14PD") + } + return p +} + +// VRCP14PS performs "Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VRCP14PS +// Supported forms : (6 forms) +// +// * VRCP14PS m512/m32bcst, zmm{k}{z} [AVX512F] +// * VRCP14PS zmm, zmm{k}{z} [AVX512F] +// * VRCP14PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VRCP14PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VRCP14PS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VRCP14PS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VRCP14PS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VRCP14PS", 2, Operands { v0, v1 }) + // VRCP14PS m512/m32bcst, zmm{k}{z} + if isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRCP14PS zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRCP14PS m128/m32bcst, xmm{k}{z} + if isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VRCP14PS m256/m32bcst, ymm{k}{z} + if isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VRCP14PS xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRCP14PS ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP14PS") + } + return p +} + +// VRCP14SD performs "Compute Approximate Reciprocal of a Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VRCP14SD +// Supported forms : (2 forms) +// +// * VRCP14SD xmm, xmm, xmm{k}{z} [AVX512F] +// * VRCP14SD m64, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VRCP14SD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VRCP14SD", 3, Operands { v0, v1, v2 }) + // VRCP14SD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VRCP14SD m64, xmm, xmm{k}{z} + if isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x4d) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP14SD") + } + return p +} + +// VRCP14SS performs "Compute Approximate Reciprocal of a Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VRCP14SS +// Supported forms : (2 forms) +// +// * VRCP14SS xmm, xmm, xmm{k}{z} [AVX512F] +// * VRCP14SS m32, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VRCP14SS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VRCP14SS", 3, Operands { v0, v1, v2 }) + // VRCP14SS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VRCP14SS m32, xmm, xmm{k}{z} + if isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x4d) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP14SS") + } + return p +} + +// VRCP28PD performs "Approximation to the Reciprocal of Packed Double-Precision Floating-Point Values with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRCP28PD +// Supported forms : (3 forms) +// +// * VRCP28PD m512/m64bcst, zmm{k}{z} [AVX512ER] +// * VRCP28PD {sae}, zmm, zmm{k}{z} [AVX512ER] +// * VRCP28PD zmm, zmm{k}{z} [AVX512ER] +// +func (self *Program) VRCP28PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRCP28PD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VRCP28PD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VRCP28PD takes 2 or 3 operands") + } + // VRCP28PD m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xca) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRCP28PD {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0xca) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VRCP28PD zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xca) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP28PD") + } + return p +} + +// VRCP28PS performs "Approximation to the Reciprocal of Packed Single-Precision Floating-Point Values with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRCP28PS +// Supported forms : (3 forms) +// +// * VRCP28PS m512/m32bcst, zmm{k}{z} [AVX512ER] +// * VRCP28PS {sae}, zmm, zmm{k}{z} [AVX512ER] +// * VRCP28PS zmm, zmm{k}{z} [AVX512ER] +// +func (self *Program) VRCP28PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRCP28PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VRCP28PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VRCP28PS takes 2 or 3 operands") + } + // VRCP28PS m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xca) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRCP28PS {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0xca) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VRCP28PS zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xca) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP28PS") + } + return p +} + +// VRCP28SD performs "Approximation to the Reciprocal of a Scalar Double-Precision Floating-Point Value with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRCP28SD +// Supported forms : (3 forms) +// +// * VRCP28SD m64, xmm, xmm{k}{z} [AVX512ER] +// * VRCP28SD {sae}, xmm, xmm, xmm{k}{z} [AVX512ER] +// * VRCP28SD xmm, xmm, xmm{k}{z} [AVX512ER] +// +func (self *Program) VRCP28SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRCP28SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VRCP28SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VRCP28SD takes 3 or 4 operands") + } + // VRCP28SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xcb) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VRCP28SD {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xcb) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VRCP28SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xcb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP28SD") + } + return p +} + +// VRCP28SS performs "Approximation to the Reciprocal of a Scalar Single-Precision Floating-Point Value with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRCP28SS +// Supported forms : (3 forms) +// +// * VRCP28SS m32, xmm, xmm{k}{z} [AVX512ER] +// * VRCP28SS {sae}, xmm, xmm, xmm{k}{z} [AVX512ER] +// * VRCP28SS xmm, xmm, xmm{k}{z} [AVX512ER] +// +func (self *Program) VRCP28SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRCP28SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VRCP28SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VRCP28SS takes 3 or 4 operands") + } + // VRCP28SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xcb) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VRCP28SS {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xcb) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VRCP28SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xcb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP28SS") + } + return p +} + +// VRCPPS performs "Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VRCPPS +// Supported forms : (4 forms) +// +// * VRCPPS xmm, xmm [AVX] +// * VRCPPS m128, xmm [AVX] +// * VRCPPS ymm, ymm [AVX] +// * VRCPPS m256, ymm [AVX] +// +func (self *Program) VRCPPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VRCPPS", 2, Operands { v0, v1 }) + // VRCPPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x53) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRCPPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x53) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VRCPPS ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x53) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRCPPS m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), addr(v[0]), 0) + m.emit(0x53) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VRCPPS") + } + return p +} + +// VRCPSS performs "Compute Approximate Reciprocal of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VRCPSS +// Supported forms : (2 forms) +// +// * VRCPSS xmm, xmm, xmm [AVX] +// * VRCPSS m32, xmm, xmm [AVX] +// +func (self *Program) VRCPSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VRCPSS", 3, Operands { v0, v1, v2 }) + // VRCPSS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x53) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VRCPSS m32, xmm, xmm + if isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x53) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VRCPSS") + } + return p +} + +// VREDUCEPD performs "Perform Reduction Transformation on Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VREDUCEPD +// Supported forms : (6 forms) +// +// * VREDUCEPD imm8, m512/m64bcst, zmm{k}{z} [AVX512DQ] +// * VREDUCEPD imm8, zmm, zmm{k}{z} [AVX512DQ] +// * VREDUCEPD imm8, m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VREDUCEPD imm8, m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VREDUCEPD imm8, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VREDUCEPD imm8, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VREDUCEPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VREDUCEPD", 3, Operands { v0, v1, v2 }) + // VREDUCEPD imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPD imm8, m128/m64bcst, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPD imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VREDUCEPD") + } + return p +} + +// VREDUCEPS performs "Perform Reduction Transformation on Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VREDUCEPS +// Supported forms : (6 forms) +// +// * VREDUCEPS imm8, m512/m32bcst, zmm{k}{z} [AVX512DQ] +// * VREDUCEPS imm8, zmm, zmm{k}{z} [AVX512DQ] +// * VREDUCEPS imm8, m128/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VREDUCEPS imm8, m256/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VREDUCEPS imm8, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VREDUCEPS imm8, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VREDUCEPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VREDUCEPS", 3, Operands { v0, v1, v2 }) + // VREDUCEPS imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPS imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPS imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPS imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPS imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPS imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VREDUCEPS") + } + return p +} + +// VREDUCESD performs "Perform Reduction Transformation on a Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VREDUCESD +// Supported forms : (2 forms) +// +// * VREDUCESD imm8, xmm, xmm, xmm{k}{z} [AVX512DQ] +// * VREDUCESD imm8, m64, xmm, xmm{k}{z} [AVX512DQ] +// +func (self *Program) VREDUCESD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VREDUCESD", 4, Operands { v0, v1, v2, v3 }) + // VREDUCESD imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x57) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCESD imm8, m64, xmm, xmm{k}{z} + if isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x57) + m.mrsd(lcode(v[3]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VREDUCESD") + } + return p +} + +// VREDUCESS performs "Perform Reduction Transformation on a Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VREDUCESS +// Supported forms : (2 forms) +// +// * VREDUCESS imm8, xmm, xmm, xmm{k}{z} [AVX512DQ] +// * VREDUCESS imm8, m32, xmm, xmm{k}{z} [AVX512DQ] +// +func (self *Program) VREDUCESS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VREDUCESS", 4, Operands { v0, v1, v2, v3 }) + // VREDUCESS imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x57) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCESS imm8, m32, xmm, xmm{k}{z} + if isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x57) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VREDUCESS") + } + return p +} + +// VRNDSCALEPD performs "Round Packed Double-Precision Floating-Point Values To Include A Given Number Of Fraction Bits". +// +// Mnemonic : VRNDSCALEPD +// Supported forms : (7 forms) +// +// * VRNDSCALEPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VRNDSCALEPD imm8, {sae}, zmm, zmm{k}{z} [AVX512F] +// * VRNDSCALEPD imm8, zmm, zmm{k}{z} [AVX512F] +// * VRNDSCALEPD imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VRNDSCALEPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VRNDSCALEPD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VRNDSCALEPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VRNDSCALEPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRNDSCALEPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VRNDSCALEPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VRNDSCALEPD takes 3 or 4 operands") + } + // VRNDSCALEPD imm8, m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPD imm8, {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18) + m.emit(0x09) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPD imm8, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPD imm8, m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPD imm8, m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPD imm8, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPD imm8, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRNDSCALEPD") + } + return p +} + +// VRNDSCALEPS performs "Round Packed Single-Precision Floating-Point Values To Include A Given Number Of Fraction Bits". +// +// Mnemonic : VRNDSCALEPS +// Supported forms : (7 forms) +// +// * VRNDSCALEPS imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VRNDSCALEPS imm8, {sae}, zmm, zmm{k}{z} [AVX512F] +// * VRNDSCALEPS imm8, zmm, zmm{k}{z} [AVX512F] +// * VRNDSCALEPS imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VRNDSCALEPS imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VRNDSCALEPS imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VRNDSCALEPS imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VRNDSCALEPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRNDSCALEPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VRNDSCALEPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VRNDSCALEPS takes 3 or 4 operands") + } + // VRNDSCALEPS imm8, m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPS imm8, {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18) + m.emit(0x08) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPS imm8, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPS imm8, m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPS imm8, m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPS imm8, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPS imm8, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRNDSCALEPS") + } + return p +} + +// VRNDSCALESD performs "Round Scalar Double-Precision Floating-Point Value To Include A Given Number Of Fraction Bits". +// +// Mnemonic : VRNDSCALESD +// Supported forms : (3 forms) +// +// * VRNDSCALESD imm8, m64, xmm, xmm{k}{z} [AVX512F] +// * VRNDSCALESD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VRNDSCALESD imm8, xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VRNDSCALESD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRNDSCALESD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VRNDSCALESD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VRNDSCALESD takes 4 or 5 operands") + } + // VRNDSCALESD imm8, m64, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x0b) + m.mrsd(lcode(v[3]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALESD imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALESD imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRNDSCALESD") + } + return p +} + +// VRNDSCALESS performs "Round Scalar Single-Precision Floating-Point Value To Include A Given Number Of Fraction Bits". +// +// Mnemonic : VRNDSCALESS +// Supported forms : (3 forms) +// +// * VRNDSCALESS imm8, m32, xmm, xmm{k}{z} [AVX512F] +// * VRNDSCALESS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VRNDSCALESS imm8, xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VRNDSCALESS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRNDSCALESS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VRNDSCALESS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VRNDSCALESS takes 4 or 5 operands") + } + // VRNDSCALESS imm8, m32, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x0a) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALESS imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALESS imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRNDSCALESS") + } + return p +} + +// VROUNDPD performs "Round Packed Double Precision Floating-Point Values". +// +// Mnemonic : VROUNDPD +// Supported forms : (4 forms) +// +// * VROUNDPD imm8, xmm, xmm [AVX] +// * VROUNDPD imm8, m128, xmm [AVX] +// * VROUNDPD imm8, ymm, ymm [AVX] +// * VROUNDPD imm8, m256, ymm [AVX] +// +func (self *Program) VROUNDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VROUNDPD", 3, Operands { v0, v1, v2 }) + // VROUNDPD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDPD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDPD imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDPD imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VROUNDPD") + } + return p +} + +// VROUNDPS performs "Round Packed Single Precision Floating-Point Values". +// +// Mnemonic : VROUNDPS +// Supported forms : (4 forms) +// +// * VROUNDPS imm8, xmm, xmm [AVX] +// * VROUNDPS imm8, m128, xmm [AVX] +// * VROUNDPS imm8, ymm, ymm [AVX] +// * VROUNDPS imm8, m256, ymm [AVX] +// +func (self *Program) VROUNDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VROUNDPS", 3, Operands { v0, v1, v2 }) + // VROUNDPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDPS imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDPS imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDPS imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VROUNDPS") + } + return p +} + +// VROUNDSD performs "Round Scalar Double Precision Floating-Point Values". +// +// Mnemonic : VROUNDSD +// Supported forms : (2 forms) +// +// * VROUNDSD imm8, xmm, xmm, xmm [AVX] +// * VROUNDSD imm8, m64, xmm, xmm [AVX] +// +func (self *Program) VROUNDSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VROUNDSD", 4, Operands { v0, v1, v2, v3 }) + // VROUNDSD imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDSD imm8, m64, xmm, xmm + if isImm8(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0b) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VROUNDSD") + } + return p +} + +// VROUNDSS performs "Round Scalar Single Precision Floating-Point Values". +// +// Mnemonic : VROUNDSS +// Supported forms : (2 forms) +// +// * VROUNDSS imm8, xmm, xmm, xmm [AVX] +// * VROUNDSS imm8, m32, xmm, xmm [AVX] +// +func (self *Program) VROUNDSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VROUNDSS", 4, Operands { v0, v1, v2, v3 }) + // VROUNDSS imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDSS imm8, m32, xmm, xmm + if isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0a) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VROUNDSS") + } + return p +} + +// VRSQRT14PD performs "Compute Approximate Reciprocals of Square Roots of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VRSQRT14PD +// Supported forms : (6 forms) +// +// * VRSQRT14PD m512/m64bcst, zmm{k}{z} [AVX512F] +// * VRSQRT14PD zmm, zmm{k}{z} [AVX512F] +// * VRSQRT14PD m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VRSQRT14PD m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VRSQRT14PD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VRSQRT14PD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VRSQRT14PD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VRSQRT14PD", 2, Operands { v0, v1 }) + // VRSQRT14PD m512/m64bcst, zmm{k}{z} + if isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRSQRT14PD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRSQRT14PD m128/m64bcst, xmm{k}{z} + if isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VRSQRT14PD m256/m64bcst, ymm{k}{z} + if isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VRSQRT14PD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRSQRT14PD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT14PD") + } + return p +} + +// VRSQRT14PS performs "Compute Approximate Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VRSQRT14PS +// Supported forms : (6 forms) +// +// * VRSQRT14PS m512/m32bcst, zmm{k}{z} [AVX512F] +// * VRSQRT14PS zmm, zmm{k}{z} [AVX512F] +// * VRSQRT14PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VRSQRT14PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VRSQRT14PS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VRSQRT14PS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VRSQRT14PS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VRSQRT14PS", 2, Operands { v0, v1 }) + // VRSQRT14PS m512/m32bcst, zmm{k}{z} + if isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRSQRT14PS zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRSQRT14PS m128/m32bcst, xmm{k}{z} + if isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VRSQRT14PS m256/m32bcst, ymm{k}{z} + if isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VRSQRT14PS xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRSQRT14PS ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT14PS") + } + return p +} + +// VRSQRT14SD performs "Compute Approximate Reciprocal of a Square Root of a Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VRSQRT14SD +// Supported forms : (2 forms) +// +// * VRSQRT14SD xmm, xmm, xmm{k}{z} [AVX512F] +// * VRSQRT14SD m64, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VRSQRT14SD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VRSQRT14SD", 3, Operands { v0, v1, v2 }) + // VRSQRT14SD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VRSQRT14SD m64, xmm, xmm{k}{z} + if isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x4f) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT14SD") + } + return p +} + +// VRSQRT14SS performs "Compute Approximate Reciprocal of a Square Root of a Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VRSQRT14SS +// Supported forms : (2 forms) +// +// * VRSQRT14SS xmm, xmm, xmm{k}{z} [AVX512F] +// * VRSQRT14SS m32, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VRSQRT14SS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VRSQRT14SS", 3, Operands { v0, v1, v2 }) + // VRSQRT14SS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VRSQRT14SS m32, xmm, xmm{k}{z} + if isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x4f) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT14SS") + } + return p +} + +// VRSQRT28PD performs "Approximation to the Reciprocal Square Root of Packed Double-Precision Floating-Point Values with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRSQRT28PD +// Supported forms : (3 forms) +// +// * VRSQRT28PD m512/m64bcst, zmm{k}{z} [AVX512ER] +// * VRSQRT28PD {sae}, zmm, zmm{k}{z} [AVX512ER] +// * VRSQRT28PD zmm, zmm{k}{z} [AVX512ER] +// +func (self *Program) VRSQRT28PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRSQRT28PD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VRSQRT28PD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VRSQRT28PD takes 2 or 3 operands") + } + // VRSQRT28PD m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xcc) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRSQRT28PD {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0xcc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VRSQRT28PD zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xcc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT28PD") + } + return p +} + +// VRSQRT28PS performs "Approximation to the Reciprocal Square Root of Packed Single-Precision Floating-Point Values with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRSQRT28PS +// Supported forms : (3 forms) +// +// * VRSQRT28PS m512/m32bcst, zmm{k}{z} [AVX512ER] +// * VRSQRT28PS {sae}, zmm, zmm{k}{z} [AVX512ER] +// * VRSQRT28PS zmm, zmm{k}{z} [AVX512ER] +// +func (self *Program) VRSQRT28PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRSQRT28PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VRSQRT28PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VRSQRT28PS takes 2 or 3 operands") + } + // VRSQRT28PS m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xcc) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRSQRT28PS {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0xcc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VRSQRT28PS zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xcc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT28PS") + } + return p +} + +// VRSQRT28SD performs "Approximation to the Reciprocal Square Root of a Scalar Double-Precision Floating-Point Value with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRSQRT28SD +// Supported forms : (3 forms) +// +// * VRSQRT28SD m64, xmm, xmm{k}{z} [AVX512ER] +// * VRSQRT28SD {sae}, xmm, xmm, xmm{k}{z} [AVX512ER] +// * VRSQRT28SD xmm, xmm, xmm{k}{z} [AVX512ER] +// +func (self *Program) VRSQRT28SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRSQRT28SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VRSQRT28SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VRSQRT28SD takes 3 or 4 operands") + } + // VRSQRT28SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xcd) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VRSQRT28SD {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xcd) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VRSQRT28SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xcd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT28SD") + } + return p +} + +// VRSQRT28SS performs "Approximation to the Reciprocal Square Root of a Scalar Single-Precision Floating-Point Value with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRSQRT28SS +// Supported forms : (3 forms) +// +// * VRSQRT28SS m32, xmm, xmm{k}{z} [AVX512ER] +// * VRSQRT28SS {sae}, xmm, xmm, xmm{k}{z} [AVX512ER] +// * VRSQRT28SS xmm, xmm, xmm{k}{z} [AVX512ER] +// +func (self *Program) VRSQRT28SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRSQRT28SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VRSQRT28SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VRSQRT28SS takes 3 or 4 operands") + } + // VRSQRT28SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xcd) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VRSQRT28SS {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xcd) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VRSQRT28SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xcd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT28SS") + } + return p +} + +// VRSQRTPS performs "Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VRSQRTPS +// Supported forms : (4 forms) +// +// * VRSQRTPS xmm, xmm [AVX] +// * VRSQRTPS m128, xmm [AVX] +// * VRSQRTPS ymm, ymm [AVX] +// * VRSQRTPS m256, ymm [AVX] +// +func (self *Program) VRSQRTPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VRSQRTPS", 2, Operands { v0, v1 }) + // VRSQRTPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x52) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRSQRTPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x52) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VRSQRTPS ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x52) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRSQRTPS m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), addr(v[0]), 0) + m.emit(0x52) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRTPS") + } + return p +} + +// VRSQRTSS performs "Compute Reciprocal of Square Root of Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VRSQRTSS +// Supported forms : (2 forms) +// +// * VRSQRTSS xmm, xmm, xmm [AVX] +// * VRSQRTSS m32, xmm, xmm [AVX] +// +func (self *Program) VRSQRTSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VRSQRTSS", 3, Operands { v0, v1, v2 }) + // VRSQRTSS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x52) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VRSQRTSS m32, xmm, xmm + if isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x52) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRTSS") + } + return p +} + +// VSCALEFPD performs "Scale Packed Double-Precision Floating-Point Values With Double-Precision Floating-Point Values". +// +// Mnemonic : VSCALEFPD +// Supported forms : (7 forms) +// +// * VSCALEFPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VSCALEFPD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSCALEFPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VSCALEFPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSCALEFPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSCALEFPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSCALEFPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSCALEFPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSCALEFPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSCALEFPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSCALEFPD takes 3 or 4 operands") + } + // VSCALEFPD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VSCALEFPD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSCALEFPD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSCALEFPD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VSCALEFPD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSCALEFPD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VSCALEFPD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSCALEFPD") + } + return p +} + +// VSCALEFPS performs "Scale Packed Single-Precision Floating-Point Values With Single-Precision Floating-Point Values". +// +// Mnemonic : VSCALEFPS +// Supported forms : (7 forms) +// +// * VSCALEFPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VSCALEFPS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSCALEFPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VSCALEFPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSCALEFPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSCALEFPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSCALEFPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSCALEFPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSCALEFPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSCALEFPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSCALEFPS takes 3 or 4 operands") + } + // VSCALEFPS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VSCALEFPS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSCALEFPS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSCALEFPS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VSCALEFPS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSCALEFPS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VSCALEFPS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSCALEFPS") + } + return p +} + +// VSCALEFSD performs "Scale Scalar Double-Precision Floating-Point Value With a Double-Precision Floating-Point Value". +// +// Mnemonic : VSCALEFSD +// Supported forms : (3 forms) +// +// * VSCALEFSD m64, xmm, xmm{k}{z} [AVX512F] +// * VSCALEFSD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VSCALEFSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VSCALEFSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSCALEFSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSCALEFSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSCALEFSD takes 3 or 4 operands") + } + // VSCALEFSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x2d) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VSCALEFSD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSCALEFSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSCALEFSD") + } + return p +} + +// VSCALEFSS performs "Scale Scalar Single-Precision Floating-Point Value With a Single-Precision Floating-Point Value". +// +// Mnemonic : VSCALEFSS +// Supported forms : (3 forms) +// +// * VSCALEFSS m32, xmm, xmm{k}{z} [AVX512F] +// * VSCALEFSS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VSCALEFSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VSCALEFSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSCALEFSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSCALEFSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSCALEFSS takes 3 or 4 operands") + } + // VSCALEFSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x2d) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VSCALEFSS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSCALEFSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSCALEFSS") + } + return p +} + +// VSCATTERDPD performs "Scatter Packed Double-Precision Floating-Point Values with Signed Doubleword Indices". +// +// Mnemonic : VSCATTERDPD +// Supported forms : (3 forms) +// +// * VSCATTERDPD zmm, vm32y{k} [AVX512F] +// * VSCATTERDPD xmm, vm32x{k} [AVX512F,AVX512VL] +// * VSCATTERDPD ymm, vm32x{k} [AVX512F,AVX512VL] +// +func (self *Program) VSCATTERDPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VSCATTERDPD", 2, Operands { v0, v1 }) + // VSCATTERDPD zmm, vm32y{k} + if isZMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa2) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VSCATTERDPD xmm, vm32x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa2) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VSCATTERDPD ymm, vm32x{k} + if isEVEXYMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa2) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERDPD") + } + return p +} + +// VSCATTERDPS performs "Scatter Packed Single-Precision Floating-Point Values with Signed Doubleword Indices". +// +// Mnemonic : VSCATTERDPS +// Supported forms : (3 forms) +// +// * VSCATTERDPS zmm, vm32z{k} [AVX512F] +// * VSCATTERDPS xmm, vm32x{k} [AVX512F,AVX512VL] +// * VSCATTERDPS ymm, vm32y{k} [AVX512F,AVX512VL] +// +func (self *Program) VSCATTERDPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VSCATTERDPS", 2, Operands { v0, v1 }) + // VSCATTERDPS zmm, vm32z{k} + if isZMM(v0) && isVMZk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa2) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VSCATTERDPS xmm, vm32x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa2) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VSCATTERDPS ymm, vm32y{k} + if isEVEXYMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa2) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERDPS") + } + return p +} + +// VSCATTERPF0DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF0DPD +// Supported forms : (1 form) +// +// * VSCATTERPF0DPD vm32y{k} [AVX512PF] +// +func (self *Program) VSCATTERPF0DPD(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF0DPD", 1, Operands { v0 }) + // VSCATTERPF0DPD vm32y{k} + if isVMYk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(5, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF0DPD") + } + return p +} + +// VSCATTERPF0DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF0DPS +// Supported forms : (1 form) +// +// * VSCATTERPF0DPS vm32z{k} [AVX512PF] +// +func (self *Program) VSCATTERPF0DPS(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF0DPS", 1, Operands { v0 }) + // VSCATTERPF0DPS vm32z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(5, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF0DPS") + } + return p +} + +// VSCATTERPF0QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF0QPD +// Supported forms : (1 form) +// +// * VSCATTERPF0QPD vm64z{k} [AVX512PF] +// +func (self *Program) VSCATTERPF0QPD(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF0QPD", 1, Operands { v0 }) + // VSCATTERPF0QPD vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(5, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF0QPD") + } + return p +} + +// VSCATTERPF0QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF0QPS +// Supported forms : (1 form) +// +// * VSCATTERPF0QPS vm64z{k} [AVX512PF] +// +func (self *Program) VSCATTERPF0QPS(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF0QPS", 1, Operands { v0 }) + // VSCATTERPF0QPS vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(5, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF0QPS") + } + return p +} + +// VSCATTERPF1DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF1DPD +// Supported forms : (1 form) +// +// * VSCATTERPF1DPD vm32y{k} [AVX512PF] +// +func (self *Program) VSCATTERPF1DPD(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF1DPD", 1, Operands { v0 }) + // VSCATTERPF1DPD vm32y{k} + if isVMYk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(6, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF1DPD") + } + return p +} + +// VSCATTERPF1DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF1DPS +// Supported forms : (1 form) +// +// * VSCATTERPF1DPS vm32z{k} [AVX512PF] +// +func (self *Program) VSCATTERPF1DPS(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF1DPS", 1, Operands { v0 }) + // VSCATTERPF1DPS vm32z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(6, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF1DPS") + } + return p +} + +// VSCATTERPF1QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF1QPD +// Supported forms : (1 form) +// +// * VSCATTERPF1QPD vm64z{k} [AVX512PF] +// +func (self *Program) VSCATTERPF1QPD(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF1QPD", 1, Operands { v0 }) + // VSCATTERPF1QPD vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(6, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF1QPD") + } + return p +} + +// VSCATTERPF1QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF1QPS +// Supported forms : (1 form) +// +// * VSCATTERPF1QPS vm64z{k} [AVX512PF] +// +func (self *Program) VSCATTERPF1QPS(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF1QPS", 1, Operands { v0 }) + // VSCATTERPF1QPS vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(6, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF1QPS") + } + return p +} + +// VSCATTERQPD performs "Scatter Packed Double-Precision Floating-Point Values with Signed Quadword Indices". +// +// Mnemonic : VSCATTERQPD +// Supported forms : (3 forms) +// +// * VSCATTERQPD zmm, vm64z{k} [AVX512F] +// * VSCATTERQPD xmm, vm64x{k} [AVX512F,AVX512VL] +// * VSCATTERQPD ymm, vm64y{k} [AVX512F,AVX512VL] +// +func (self *Program) VSCATTERQPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VSCATTERQPD", 2, Operands { v0, v1 }) + // VSCATTERQPD zmm, vm64z{k} + if isZMM(v0) && isVMZk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VSCATTERQPD xmm, vm64x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VSCATTERQPD ymm, vm64y{k} + if isEVEXYMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERQPD") + } + return p +} + +// VSCATTERQPS performs "Scatter Packed Single-Precision Floating-Point Values with Signed Quadword Indices". +// +// Mnemonic : VSCATTERQPS +// Supported forms : (3 forms) +// +// * VSCATTERQPS ymm, vm64z{k} [AVX512F] +// * VSCATTERQPS xmm, vm64x{k} [AVX512F,AVX512VL] +// * VSCATTERQPS xmm, vm64y{k} [AVX512F,AVX512VL] +// +func (self *Program) VSCATTERQPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VSCATTERQPS", 2, Operands { v0, v1 }) + // VSCATTERQPS ymm, vm64z{k} + if isEVEXYMM(v0) && isVMZk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VSCATTERQPS xmm, vm64x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VSCATTERQPS xmm, vm64y{k} + if isEVEXXMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERQPS") + } + return p +} + +// VSHUFF32X4 performs "Shuffle 128-Bit Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VSHUFF32X4 +// Supported forms : (4 forms) +// +// * VSHUFF32X4 imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VSHUFF32X4 imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSHUFF32X4 imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSHUFF32X4 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSHUFF32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VSHUFF32X4", 4, Operands { v0, v1, v2, v3 }) + // VSHUFF32X4 imm8, m512/m32bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x23) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFF32X4 imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x23) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFF32X4 imm8, m256/m32bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x23) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFF32X4 imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x23) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSHUFF32X4") + } + return p +} + +// VSHUFF64X2 performs "Shuffle 128-Bit Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VSHUFF64X2 +// Supported forms : (4 forms) +// +// * VSHUFF64X2 imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VSHUFF64X2 imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSHUFF64X2 imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSHUFF64X2 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSHUFF64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VSHUFF64X2", 4, Operands { v0, v1, v2, v3 }) + // VSHUFF64X2 imm8, m512/m64bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x23) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFF64X2 imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x23) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFF64X2 imm8, m256/m64bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x23) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFF64X2 imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x23) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSHUFF64X2") + } + return p +} + +// VSHUFI32X4 performs "Shuffle 128-Bit Packed Doubleword Integer Values". +// +// Mnemonic : VSHUFI32X4 +// Supported forms : (4 forms) +// +// * VSHUFI32X4 imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VSHUFI32X4 imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSHUFI32X4 imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSHUFI32X4 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSHUFI32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VSHUFI32X4", 4, Operands { v0, v1, v2, v3 }) + // VSHUFI32X4 imm8, m512/m32bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x43) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFI32X4 imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x43) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFI32X4 imm8, m256/m32bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x43) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFI32X4 imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x43) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSHUFI32X4") + } + return p +} + +// VSHUFI64X2 performs "Shuffle 128-Bit Packed Quadword Integer Values". +// +// Mnemonic : VSHUFI64X2 +// Supported forms : (4 forms) +// +// * VSHUFI64X2 imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VSHUFI64X2 imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSHUFI64X2 imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSHUFI64X2 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSHUFI64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VSHUFI64X2", 4, Operands { v0, v1, v2, v3 }) + // VSHUFI64X2 imm8, m512/m64bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x43) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFI64X2 imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x43) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFI64X2 imm8, m256/m64bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x43) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFI64X2 imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x43) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSHUFI64X2") + } + return p +} + +// VSHUFPD performs "Shuffle Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VSHUFPD +// Supported forms : (10 forms) +// +// * VSHUFPD imm8, xmm, xmm, xmm [AVX] +// * VSHUFPD imm8, m128, xmm, xmm [AVX] +// * VSHUFPD imm8, ymm, ymm, ymm [AVX] +// * VSHUFPD imm8, m256, ymm, ymm [AVX] +// * VSHUFPD imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VSHUFPD imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSHUFPD imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSHUFPD imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSHUFPD imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSHUFPD imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSHUFPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VSHUFPD", 4, Operands { v0, v1, v2, v3 }) + // VSHUFPD imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, m512/m64bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, m128/m64bcst, xmm, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, m256/m64bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSHUFPD") + } + return p +} + +// VSHUFPS performs "Shuffle Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VSHUFPS +// Supported forms : (10 forms) +// +// * VSHUFPS imm8, xmm, xmm, xmm [AVX] +// * VSHUFPS imm8, m128, xmm, xmm [AVX] +// * VSHUFPS imm8, ymm, ymm, ymm [AVX] +// * VSHUFPS imm8, m256, ymm, ymm [AVX] +// * VSHUFPS imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VSHUFPS imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSHUFPS imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSHUFPS imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSHUFPS imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSHUFPS imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSHUFPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VSHUFPS", 4, Operands { v0, v1, v2, v3 }) + // VSHUFPS imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, m512/m32bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, m128/m32bcst, xmm, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, m256/m32bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSHUFPS") + } + return p +} + +// VSQRTPD performs "Compute Square Roots of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VSQRTPD +// Supported forms : (11 forms) +// +// * VSQRTPD xmm, xmm [AVX] +// * VSQRTPD m128, xmm [AVX] +// * VSQRTPD ymm, ymm [AVX] +// * VSQRTPD m256, ymm [AVX] +// * VSQRTPD m512/m64bcst, zmm{k}{z} [AVX512F] +// * VSQRTPD {er}, zmm, zmm{k}{z} [AVX512F] +// * VSQRTPD zmm, zmm{k}{z} [AVX512F] +// * VSQRTPD m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VSQRTPD m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VSQRTPD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSQRTPD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSQRTPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSQRTPD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VSQRTPD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VSQRTPD takes 2 or 3 operands") + } + // VSQRTPD xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPD m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VSQRTPD ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPD m256, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), addr(v[0]), 0) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VSQRTPD m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VSQRTPD {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x51) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VSQRTPD zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPD m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VSQRTPD m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VSQRTPD xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPD ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSQRTPD") + } + return p +} + +// VSQRTPS performs "Compute Square Roots of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VSQRTPS +// Supported forms : (11 forms) +// +// * VSQRTPS xmm, xmm [AVX] +// * VSQRTPS m128, xmm [AVX] +// * VSQRTPS ymm, ymm [AVX] +// * VSQRTPS m256, ymm [AVX] +// * VSQRTPS m512/m32bcst, zmm{k}{z} [AVX512F] +// * VSQRTPS {er}, zmm, zmm{k}{z} [AVX512F] +// * VSQRTPS zmm, zmm{k}{z} [AVX512F] +// * VSQRTPS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VSQRTPS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VSQRTPS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSQRTPS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSQRTPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSQRTPS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VSQRTPS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VSQRTPS takes 2 or 3 operands") + } + // VSQRTPS xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPS m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VSQRTPS ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPS m256, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), addr(v[0]), 0) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VSQRTPS m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VSQRTPS {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x51) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VSQRTPS zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPS m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VSQRTPS m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VSQRTPS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPS ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSQRTPS") + } + return p +} + +// VSQRTSD performs "Compute Square Root of Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VSQRTSD +// Supported forms : (5 forms) +// +// * VSQRTSD xmm, xmm, xmm [AVX] +// * VSQRTSD m64, xmm, xmm [AVX] +// * VSQRTSD m64, xmm, xmm{k}{z} [AVX512F] +// * VSQRTSD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VSQRTSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VSQRTSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSQRTSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSQRTSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSQRTSD takes 3 or 4 operands") + } + // VSQRTSD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x51) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSQRTSD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x51) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSQRTSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x51) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VSQRTSD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x51) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSQRTSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x51) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSQRTSD") + } + return p +} + +// VSQRTSS performs "Compute Square Root of Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VSQRTSS +// Supported forms : (5 forms) +// +// * VSQRTSS xmm, xmm, xmm [AVX] +// * VSQRTSS m32, xmm, xmm [AVX] +// * VSQRTSS m32, xmm, xmm{k}{z} [AVX512F] +// * VSQRTSS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VSQRTSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VSQRTSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSQRTSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSQRTSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSQRTSS takes 3 or 4 operands") + } + // VSQRTSS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x51) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSQRTSS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x51) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSQRTSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x51) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VSQRTSS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x51) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSQRTSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x51) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSQRTSS") + } + return p +} + +// VSTMXCSR performs "Store MXCSR Register State". +// +// Mnemonic : VSTMXCSR +// Supported forms : (1 form) +// +// * VSTMXCSR m32 [AVX] +// +func (self *Program) VSTMXCSR(v0 interface{}) *Instruction { + p := self.alloc("VSTMXCSR", 1, Operands { v0 }) + // VSTMXCSR m32 + if isM32(v0) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, addr(v[0]), 0) + m.emit(0xae) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VSTMXCSR") + } + return p +} + +// VSUBPD performs "Subtract Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VSUBPD +// Supported forms : (11 forms) +// +// * VSUBPD xmm, xmm, xmm [AVX] +// * VSUBPD m128, xmm, xmm [AVX] +// * VSUBPD ymm, ymm, ymm [AVX] +// * VSUBPD m256, ymm, ymm [AVX] +// * VSUBPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VSUBPD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSUBPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VSUBPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSUBPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSUBPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSUBPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSUBPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSUBPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSUBPD takes 3 or 4 operands") + } + // VSUBPD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSUBPD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSUBPD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VSUBPD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSUBPD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VSUBPD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VSUBPD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSUBPD") + } + return p +} + +// VSUBPS performs "Subtract Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VSUBPS +// Supported forms : (11 forms) +// +// * VSUBPS xmm, xmm, xmm [AVX] +// * VSUBPS m128, xmm, xmm [AVX] +// * VSUBPS ymm, ymm, ymm [AVX] +// * VSUBPS m256, ymm, ymm [AVX] +// * VSUBPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VSUBPS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSUBPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VSUBPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSUBPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSUBPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSUBPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSUBPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSUBPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSUBPS takes 3 or 4 operands") + } + // VSUBPS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSUBPS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSUBPS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VSUBPS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSUBPS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VSUBPS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VSUBPS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSUBPS") + } + return p +} + +// VSUBSD performs "Subtract Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VSUBSD +// Supported forms : (5 forms) +// +// * VSUBSD xmm, xmm, xmm [AVX] +// * VSUBSD m64, xmm, xmm [AVX] +// * VSUBSD m64, xmm, xmm{k}{z} [AVX512F] +// * VSUBSD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VSUBSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VSUBSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSUBSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSUBSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSUBSD takes 3 or 4 operands") + } + // VSUBSD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBSD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSUBSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VSUBSD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSUBSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSUBSD") + } + return p +} + +// VSUBSS performs "Subtract Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VSUBSS +// Supported forms : (5 forms) +// +// * VSUBSS xmm, xmm, xmm [AVX] +// * VSUBSS m32, xmm, xmm [AVX] +// * VSUBSS m32, xmm, xmm{k}{z} [AVX512F] +// * VSUBSS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VSUBSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VSUBSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSUBSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSUBSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSUBSS takes 3 or 4 operands") + } + // VSUBSS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBSS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSUBSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VSUBSS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSUBSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSUBSS") + } + return p +} + +// VTESTPD performs "Packed Double-Precision Floating-Point Bit Test". +// +// Mnemonic : VTESTPD +// Supported forms : (4 forms) +// +// * VTESTPD xmm, xmm [AVX] +// * VTESTPD m128, xmm [AVX] +// * VTESTPD ymm, ymm [AVX] +// * VTESTPD m256, ymm [AVX] +// +func (self *Program) VTESTPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VTESTPD", 2, Operands { v0, v1 }) + // VTESTPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VTESTPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VTESTPD ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VTESTPD m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VTESTPD") + } + return p +} + +// VTESTPS performs "Packed Single-Precision Floating-Point Bit Test". +// +// Mnemonic : VTESTPS +// Supported forms : (4 forms) +// +// * VTESTPS xmm, xmm [AVX] +// * VTESTPS m128, xmm [AVX] +// * VTESTPS ymm, ymm [AVX] +// * VTESTPS m256, ymm [AVX] +// +func (self *Program) VTESTPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VTESTPS", 2, Operands { v0, v1 }) + // VTESTPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x0e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VTESTPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x0e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VTESTPS ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x0e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VTESTPS m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x0e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VTESTPS") + } + return p +} + +// VUCOMISD performs "Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : VUCOMISD +// Supported forms : (5 forms) +// +// * VUCOMISD xmm, xmm [AVX] +// * VUCOMISD m64, xmm [AVX] +// * VUCOMISD m64, xmm [AVX512F] +// * VUCOMISD {sae}, xmm, xmm [AVX512F] +// * VUCOMISD xmm, xmm [AVX512F] +// +func (self *Program) VUCOMISD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VUCOMISD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VUCOMISD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VUCOMISD takes 2 or 3 operands") + } + // VUCOMISD xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VUCOMISD m64, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VUCOMISD m64, xmm + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2e) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VUCOMISD {sae}, xmm, xmm + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit(0x18) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VUCOMISD xmm, xmm + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit(0x48) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VUCOMISD") + } + return p +} + +// VUCOMISS performs "Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : VUCOMISS +// Supported forms : (5 forms) +// +// * VUCOMISS xmm, xmm [AVX] +// * VUCOMISS m32, xmm [AVX] +// * VUCOMISS m32, xmm [AVX512F] +// * VUCOMISS {sae}, xmm, xmm [AVX512F] +// * VUCOMISS xmm, xmm [AVX512F] +// +func (self *Program) VUCOMISS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VUCOMISS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VUCOMISS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VUCOMISS takes 2 or 3 operands") + } + // VUCOMISS xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VUCOMISS m32, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VUCOMISS m32, xmm + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2e) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VUCOMISS {sae}, xmm, xmm + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c) + m.emit(0x18) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VUCOMISS xmm, xmm + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit(0x48) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VUCOMISS") + } + return p +} + +// VUNPCKHPD performs "Unpack and Interleave High Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VUNPCKHPD +// Supported forms : (10 forms) +// +// * VUNPCKHPD xmm, xmm, xmm [AVX] +// * VUNPCKHPD m128, xmm, xmm [AVX] +// * VUNPCKHPD ymm, ymm, ymm [AVX] +// * VUNPCKHPD m256, ymm, ymm [AVX] +// * VUNPCKHPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKHPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKHPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKHPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKHPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKHPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VUNPCKHPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VUNPCKHPD", 3, Operands { v0, v1, v2 }) + // VUNPCKHPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKHPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKHPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VUNPCKHPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VUNPCKHPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VUNPCKHPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VUNPCKHPD") + } + return p +} + +// VUNPCKHPS performs "Unpack and Interleave High Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VUNPCKHPS +// Supported forms : (10 forms) +// +// * VUNPCKHPS xmm, xmm, xmm [AVX] +// * VUNPCKHPS m128, xmm, xmm [AVX] +// * VUNPCKHPS ymm, ymm, ymm [AVX] +// * VUNPCKHPS m256, ymm, ymm [AVX] +// * VUNPCKHPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKHPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKHPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKHPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKHPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKHPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VUNPCKHPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VUNPCKHPS", 3, Operands { v0, v1, v2 }) + // VUNPCKHPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKHPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKHPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VUNPCKHPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VUNPCKHPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VUNPCKHPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VUNPCKHPS") + } + return p +} + +// VUNPCKLPD performs "Unpack and Interleave Low Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VUNPCKLPD +// Supported forms : (10 forms) +// +// * VUNPCKLPD xmm, xmm, xmm [AVX] +// * VUNPCKLPD m128, xmm, xmm [AVX] +// * VUNPCKLPD ymm, ymm, ymm [AVX] +// * VUNPCKLPD m256, ymm, ymm [AVX] +// * VUNPCKLPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKLPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKLPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKLPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKLPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKLPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VUNPCKLPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VUNPCKLPD", 3, Operands { v0, v1, v2 }) + // VUNPCKLPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKLPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKLPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VUNPCKLPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VUNPCKLPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VUNPCKLPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VUNPCKLPD") + } + return p +} + +// VUNPCKLPS performs "Unpack and Interleave Low Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VUNPCKLPS +// Supported forms : (10 forms) +// +// * VUNPCKLPS xmm, xmm, xmm [AVX] +// * VUNPCKLPS m128, xmm, xmm [AVX] +// * VUNPCKLPS ymm, ymm, ymm [AVX] +// * VUNPCKLPS m256, ymm, ymm [AVX] +// * VUNPCKLPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKLPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKLPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKLPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKLPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKLPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VUNPCKLPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VUNPCKLPS", 3, Operands { v0, v1, v2 }) + // VUNPCKLPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKLPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKLPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VUNPCKLPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VUNPCKLPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VUNPCKLPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VUNPCKLPS") + } + return p +} + +// VXORPD performs "Bitwise Logical XOR for Double-Precision Floating-Point Values". +// +// Mnemonic : VXORPD +// Supported forms : (10 forms) +// +// * VXORPD xmm, xmm, xmm [AVX] +// * VXORPD m128, xmm, xmm [AVX] +// * VXORPD ymm, ymm, ymm [AVX] +// * VXORPD m256, ymm, ymm [AVX] +// * VXORPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VXORPD zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VXORPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VXORPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VXORPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VXORPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VXORPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VXORPD", 3, Operands { v0, v1, v2 }) + // VXORPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VXORPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VXORPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VXORPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VXORPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VXORPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VXORPD") + } + return p +} + +// VXORPS performs "Bitwise Logical XOR for Single-Precision Floating-Point Values". +// +// Mnemonic : VXORPS +// Supported forms : (10 forms) +// +// * VXORPS xmm, xmm, xmm [AVX] +// * VXORPS m128, xmm, xmm [AVX] +// * VXORPS ymm, ymm, ymm [AVX] +// * VXORPS m256, ymm, ymm [AVX] +// * VXORPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VXORPS zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VXORPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VXORPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VXORPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VXORPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VXORPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VXORPS", 3, Operands { v0, v1, v2 }) + // VXORPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VXORPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VXORPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VXORPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VXORPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VXORPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VXORPS") + } + return p +} + +// VZEROALL performs "Zero All YMM Registers". +// +// Mnemonic : VZEROALL +// Supported forms : (1 form) +// +// * VZEROALL [AVX] +// +func (self *Program) VZEROALL() *Instruction { + p := self.alloc("VZEROALL", 0, Operands { }) + // VZEROALL + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, 0) + m.emit(0x77) + }) + return p +} + +// VZEROUPPER performs "Zero Upper Bits of YMM Registers". +// +// Mnemonic : VZEROUPPER +// Supported forms : (1 form) +// +// * VZEROUPPER [AVX] +// +func (self *Program) VZEROUPPER() *Instruction { + p := self.alloc("VZEROUPPER", 0, Operands { }) + // VZEROUPPER + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, nil, 0) + m.emit(0x77) + }) + return p +} + +// XADDB performs "Exchange and Add". +// +// Mnemonic : XADD +// Supported forms : (2 forms) +// +// * XADDB r8, r8 +// * XADDB r8, m8 +// +func (self *Program) XADDB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XADDB", 2, Operands { v0, v1 }) + // XADDB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x0f) + m.emit(0xc0) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // XADDB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0xc0) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XADDB") + } + return p +} + +// XADDL performs "Exchange and Add". +// +// Mnemonic : XADD +// Supported forms : (2 forms) +// +// * XADDL r32, r32 +// * XADDL r32, m32 +// +func (self *Program) XADDL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XADDL", 2, Operands { v0, v1 }) + // XADDL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // XADDL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XADDL") + } + return p +} + +// XADDQ performs "Exchange and Add". +// +// Mnemonic : XADD +// Supported forms : (2 forms) +// +// * XADDQ r64, r64 +// * XADDQ r64, m64 +// +func (self *Program) XADDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XADDQ", 2, Operands { v0, v1 }) + // XADDQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // XADDQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0xc1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XADDQ") + } + return p +} + +// XADDW performs "Exchange and Add". +// +// Mnemonic : XADD +// Supported forms : (2 forms) +// +// * XADDW r16, r16 +// * XADDW r16, m16 +// +func (self *Program) XADDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XADDW", 2, Operands { v0, v1 }) + // XADDW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // XADDW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XADDW") + } + return p +} + +// XCHGB performs "Exchange Register/Memory with Register". +// +// Mnemonic : XCHG +// Supported forms : (3 forms) +// +// * XCHGB r8, r8 +// * XCHGB m8, r8 +// * XCHGB r8, m8 +// +func (self *Program) XCHGB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XCHGB", 2, Operands { v0, v1 }) + // XCHGB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x86) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x86) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XCHGB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x86) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XCHGB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x86) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XCHGB") + } + return p +} + +// XCHGL performs "Exchange Register/Memory with Register". +// +// Mnemonic : XCHG +// Supported forms : (5 forms) +// +// * XCHGL r32, eax +// * XCHGL eax, r32 +// * XCHGL r32, r32 +// * XCHGL m32, r32 +// * XCHGL r32, m32 +// +func (self *Program) XCHGL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XCHGL", 2, Operands { v0, v1 }) + // XCHGL r32, eax + if isReg32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0x90 | lcode(v[0])) + }) + } + // XCHGL eax, r32 + if v0 == EAX && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x90 | lcode(v[1])) + }) + } + // XCHGL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x87) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x87) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XCHGL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x87) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XCHGL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x87) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XCHGL") + } + return p +} + +// XCHGQ performs "Exchange Register/Memory with Register". +// +// Mnemonic : XCHG +// Supported forms : (5 forms) +// +// * XCHGQ r64, rax +// * XCHGQ rax, r64 +// * XCHGQ r64, r64 +// * XCHGQ m64, r64 +// * XCHGQ r64, m64 +// +func (self *Program) XCHGQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XCHGQ", 2, Operands { v0, v1 }) + // XCHGQ r64, rax + if isReg64(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0x90 | lcode(v[0])) + }) + } + // XCHGQ rax, r64 + if v0 == RAX && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x90 | lcode(v[1])) + }) + } + // XCHGQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x87) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x87) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XCHGQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x87) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XCHGQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x87) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XCHGQ") + } + return p +} + +// XCHGW performs "Exchange Register/Memory with Register". +// +// Mnemonic : XCHG +// Supported forms : (5 forms) +// +// * XCHGW r16, ax +// * XCHGW ax, r16 +// * XCHGW r16, r16 +// * XCHGW m16, r16 +// * XCHGW r16, m16 +// +func (self *Program) XCHGW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XCHGW", 2, Operands { v0, v1 }) + // XCHGW r16, ax + if isReg16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0x90 | lcode(v[0])) + }) + } + // XCHGW ax, r16 + if v0 == AX && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x90 | lcode(v[1])) + }) + } + // XCHGW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x87) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x87) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XCHGW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x87) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XCHGW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x87) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XCHGW") + } + return p +} + +// XGETBV performs "Get Value of Extended Control Register". +// +// Mnemonic : XGETBV +// Supported forms : (1 form) +// +// * XGETBV +// +func (self *Program) XGETBV() *Instruction { + p := self.alloc("XGETBV", 0, Operands { }) + // XGETBV + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x01) + m.emit(0xd0) + }) + return p +} + +// XLATB performs "Table Look-up Translation". +// +// Mnemonic : XLATB +// Supported forms : (2 forms) +// +// * XLATB +// * XLATB +// +func (self *Program) XLATB() *Instruction { + p := self.alloc("XLATB", 0, Operands { }) + // XLATB + p.domain = DomainMisc + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xd7) + }) + // XLATB + p.domain = DomainMisc + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0xd7) + }) + return p +} + +// XORB performs "Logical Exclusive OR". +// +// Mnemonic : XOR +// Supported forms : (6 forms) +// +// * XORB imm8, al +// * XORB imm8, r8 +// * XORB r8, r8 +// * XORB m8, r8 +// * XORB imm8, m8 +// * XORB r8, m8 +// +func (self *Program) XORB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XORB", 2, Operands { v0, v1 }) + // XORB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x34) + m.imm1(toImmAny(v[0])) + }) + } + // XORB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // XORB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x30) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x32) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XORB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x32) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XORB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(6, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // XORB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x30) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XORB") + } + return p +} + +// XORL performs "Logical Exclusive OR". +// +// Mnemonic : XOR +// Supported forms : (8 forms) +// +// * XORL imm32, eax +// * XORL imm8, r32 +// * XORL imm32, r32 +// * XORL r32, r32 +// * XORL m32, r32 +// * XORL imm8, m32 +// * XORL imm32, m32 +// * XORL r32, m32 +// +func (self *Program) XORL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XORL", 2, Operands { v0, v1 }) + // XORL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x35) + m.imm4(toImmAny(v[0])) + }) + } + // XORL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // XORL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xf0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // XORL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x31) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XORL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XORL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(6, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // XORL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(6, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // XORL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x31) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XORL") + } + return p +} + +// XORPD performs "Bitwise Logical XOR for Double-Precision Floating-Point Values". +// +// Mnemonic : XORPD +// Supported forms : (2 forms) +// +// * XORPD xmm, xmm [SSE2] +// * XORPD m128, xmm [SSE2] +// +func (self *Program) XORPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XORPD", 2, Operands { v0, v1 }) + // XORPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x57) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XORPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x57) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XORPD") + } + return p +} + +// XORPS performs "Bitwise Logical XOR for Single-Precision Floating-Point Values". +// +// Mnemonic : XORPS +// Supported forms : (2 forms) +// +// * XORPS xmm, xmm [SSE] +// * XORPS m128, xmm [SSE] +// +func (self *Program) XORPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XORPS", 2, Operands { v0, v1 }) + // XORPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x57) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XORPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x57) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XORPS") + } + return p +} + +// XORQ performs "Logical Exclusive OR". +// +// Mnemonic : XOR +// Supported forms : (8 forms) +// +// * XORQ imm32, rax +// * XORQ imm8, r64 +// * XORQ imm32, r64 +// * XORQ r64, r64 +// * XORQ m64, r64 +// * XORQ imm8, m64 +// * XORQ imm32, m64 +// * XORQ r64, m64 +// +func (self *Program) XORQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XORQ", 2, Operands { v0, v1 }) + // XORQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x35) + m.imm4(toImmAny(v[0])) + }) + } + // XORQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // XORQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xf0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // XORQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x31) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XORQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XORQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(6, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // XORQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(6, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // XORQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x31) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XORQ") + } + return p +} + +// XORW performs "Logical Exclusive OR". +// +// Mnemonic : XOR +// Supported forms : (8 forms) +// +// * XORW imm16, ax +// * XORW imm8, r16 +// * XORW imm16, r16 +// * XORW r16, r16 +// * XORW m16, r16 +// * XORW imm8, m16 +// * XORW imm16, m16 +// * XORW r16, m16 +// +func (self *Program) XORW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XORW", 2, Operands { v0, v1 }) + // XORW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x35) + m.imm2(toImmAny(v[0])) + }) + } + // XORW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // XORW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xf0 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // XORW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x31) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XORW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XORW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(6, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // XORW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(6, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // XORW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x31) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XORW") + } + return p +} diff --git a/vendor/github.com/cloudwego/iasm/x86_64/instructions_table.go b/vendor/github.com/cloudwego/iasm/x86_64/instructions_table.go new file mode 100644 index 000000000..dfb652ae3 --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/x86_64/instructions_table.go @@ -0,0 +1,12307 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Code generated by "mkasm_amd64.py", DO NOT EDIT. + +package x86_64 + +const ( + _N_args = 5 + _N_forms = 23 +) + +// Instructions maps all the instruction name to it's encoder function. +var Instructions = map[string]_InstructionEncoder { + "adcb" : __asm_proxy_ADCB__, + "adcl" : __asm_proxy_ADCL__, + "adcq" : __asm_proxy_ADCQ__, + "adcw" : __asm_proxy_ADCW__, + "adcxl" : __asm_proxy_ADCXL__, + "adcxq" : __asm_proxy_ADCXQ__, + "addb" : __asm_proxy_ADDB__, + "addl" : __asm_proxy_ADDL__, + "addpd" : __asm_proxy_ADDPD__, + "addps" : __asm_proxy_ADDPS__, + "addq" : __asm_proxy_ADDQ__, + "addsd" : __asm_proxy_ADDSD__, + "addss" : __asm_proxy_ADDSS__, + "addsubpd" : __asm_proxy_ADDSUBPD__, + "addsubps" : __asm_proxy_ADDSUBPS__, + "addw" : __asm_proxy_ADDW__, + "adoxl" : __asm_proxy_ADOXL__, + "adoxq" : __asm_proxy_ADOXQ__, + "aesdec" : __asm_proxy_AESDEC__, + "aesdeclast" : __asm_proxy_AESDECLAST__, + "aesenc" : __asm_proxy_AESENC__, + "aesenclast" : __asm_proxy_AESENCLAST__, + "aesimc" : __asm_proxy_AESIMC__, + "aeskeygenassist" : __asm_proxy_AESKEYGENASSIST__, + "andb" : __asm_proxy_ANDB__, + "andl" : __asm_proxy_ANDL__, + "andnl" : __asm_proxy_ANDNL__, + "andnpd" : __asm_proxy_ANDNPD__, + "andnps" : __asm_proxy_ANDNPS__, + "andnq" : __asm_proxy_ANDNQ__, + "andpd" : __asm_proxy_ANDPD__, + "andps" : __asm_proxy_ANDPS__, + "andq" : __asm_proxy_ANDQ__, + "andw" : __asm_proxy_ANDW__, + "bextr" : __asm_proxy_BEXTR__, + "blcfill" : __asm_proxy_BLCFILL__, + "blci" : __asm_proxy_BLCI__, + "blcic" : __asm_proxy_BLCIC__, + "blcmsk" : __asm_proxy_BLCMSK__, + "blcs" : __asm_proxy_BLCS__, + "blendpd" : __asm_proxy_BLENDPD__, + "blendps" : __asm_proxy_BLENDPS__, + "blendvpd" : __asm_proxy_BLENDVPD__, + "blendvps" : __asm_proxy_BLENDVPS__, + "blsfill" : __asm_proxy_BLSFILL__, + "blsi" : __asm_proxy_BLSI__, + "blsic" : __asm_proxy_BLSIC__, + "blsmsk" : __asm_proxy_BLSMSK__, + "blsr" : __asm_proxy_BLSR__, + "bsfl" : __asm_proxy_BSFL__, + "bsfq" : __asm_proxy_BSFQ__, + "bsfw" : __asm_proxy_BSFW__, + "bsrl" : __asm_proxy_BSRL__, + "bsrq" : __asm_proxy_BSRQ__, + "bsrw" : __asm_proxy_BSRW__, + "bswapl" : __asm_proxy_BSWAPL__, + "bswapq" : __asm_proxy_BSWAPQ__, + "btcl" : __asm_proxy_BTCL__, + "btcq" : __asm_proxy_BTCQ__, + "btcw" : __asm_proxy_BTCW__, + "btl" : __asm_proxy_BTL__, + "btq" : __asm_proxy_BTQ__, + "btrl" : __asm_proxy_BTRL__, + "btrq" : __asm_proxy_BTRQ__, + "btrw" : __asm_proxy_BTRW__, + "btsl" : __asm_proxy_BTSL__, + "btsq" : __asm_proxy_BTSQ__, + "btsw" : __asm_proxy_BTSW__, + "btw" : __asm_proxy_BTW__, + "bzhi" : __asm_proxy_BZHI__, + "call" : __asm_proxy_CALL__, + "callq" : __asm_proxy_CALLQ__, + "cbtw" : __asm_proxy_CBTW__, + "clc" : __asm_proxy_CLC__, + "cld" : __asm_proxy_CLD__, + "clflush" : __asm_proxy_CLFLUSH__, + "clflushopt" : __asm_proxy_CLFLUSHOPT__, + "cltd" : __asm_proxy_CLTD__, + "cltq" : __asm_proxy_CLTQ__, + "clwb" : __asm_proxy_CLWB__, + "clzero" : __asm_proxy_CLZERO__, + "cmc" : __asm_proxy_CMC__, + "cmova" : __asm_proxy_CMOVA__, + "cmovae" : __asm_proxy_CMOVAE__, + "cmovb" : __asm_proxy_CMOVB__, + "cmovbe" : __asm_proxy_CMOVBE__, + "cmovc" : __asm_proxy_CMOVC__, + "cmove" : __asm_proxy_CMOVE__, + "cmovg" : __asm_proxy_CMOVG__, + "cmovge" : __asm_proxy_CMOVGE__, + "cmovl" : __asm_proxy_CMOVL__, + "cmovle" : __asm_proxy_CMOVLE__, + "cmovna" : __asm_proxy_CMOVNA__, + "cmovnae" : __asm_proxy_CMOVNAE__, + "cmovnb" : __asm_proxy_CMOVNB__, + "cmovnbe" : __asm_proxy_CMOVNBE__, + "cmovnc" : __asm_proxy_CMOVNC__, + "cmovne" : __asm_proxy_CMOVNE__, + "cmovng" : __asm_proxy_CMOVNG__, + "cmovnge" : __asm_proxy_CMOVNGE__, + "cmovnl" : __asm_proxy_CMOVNL__, + "cmovnle" : __asm_proxy_CMOVNLE__, + "cmovno" : __asm_proxy_CMOVNO__, + "cmovnp" : __asm_proxy_CMOVNP__, + "cmovns" : __asm_proxy_CMOVNS__, + "cmovnz" : __asm_proxy_CMOVNZ__, + "cmovo" : __asm_proxy_CMOVO__, + "cmovp" : __asm_proxy_CMOVP__, + "cmovpe" : __asm_proxy_CMOVPE__, + "cmovpo" : __asm_proxy_CMOVPO__, + "cmovs" : __asm_proxy_CMOVS__, + "cmovz" : __asm_proxy_CMOVZ__, + "cmpb" : __asm_proxy_CMPB__, + "cmpl" : __asm_proxy_CMPL__, + "cmppd" : __asm_proxy_CMPPD__, + "cmpps" : __asm_proxy_CMPPS__, + "cmpq" : __asm_proxy_CMPQ__, + "cmpsd" : __asm_proxy_CMPSD__, + "cmpss" : __asm_proxy_CMPSS__, + "cmpw" : __asm_proxy_CMPW__, + "cmpxchg16b" : __asm_proxy_CMPXCHG16B__, + "cmpxchg8b" : __asm_proxy_CMPXCHG8B__, + "cmpxchgb" : __asm_proxy_CMPXCHGB__, + "cmpxchgl" : __asm_proxy_CMPXCHGL__, + "cmpxchgq" : __asm_proxy_CMPXCHGQ__, + "cmpxchgw" : __asm_proxy_CMPXCHGW__, + "comisd" : __asm_proxy_COMISD__, + "comiss" : __asm_proxy_COMISS__, + "cpuid" : __asm_proxy_CPUID__, + "cqto" : __asm_proxy_CQTO__, + "crc32b" : __asm_proxy_CRC32B__, + "crc32l" : __asm_proxy_CRC32L__, + "crc32q" : __asm_proxy_CRC32Q__, + "crc32w" : __asm_proxy_CRC32W__, + "cvtdq2pd" : __asm_proxy_CVTDQ2PD__, + "cvtdq2ps" : __asm_proxy_CVTDQ2PS__, + "cvtpd2dq" : __asm_proxy_CVTPD2DQ__, + "cvtpd2pi" : __asm_proxy_CVTPD2PI__, + "cvtpd2ps" : __asm_proxy_CVTPD2PS__, + "cvtpi2pd" : __asm_proxy_CVTPI2PD__, + "cvtpi2ps" : __asm_proxy_CVTPI2PS__, + "cvtps2dq" : __asm_proxy_CVTPS2DQ__, + "cvtps2pd" : __asm_proxy_CVTPS2PD__, + "cvtps2pi" : __asm_proxy_CVTPS2PI__, + "cvtsd2si" : __asm_proxy_CVTSD2SI__, + "cvtsd2ss" : __asm_proxy_CVTSD2SS__, + "cvtsi2sd" : __asm_proxy_CVTSI2SD__, + "cvtsi2ss" : __asm_proxy_CVTSI2SS__, + "cvtss2sd" : __asm_proxy_CVTSS2SD__, + "cvtss2si" : __asm_proxy_CVTSS2SI__, + "cvttpd2dq" : __asm_proxy_CVTTPD2DQ__, + "cvttpd2pi" : __asm_proxy_CVTTPD2PI__, + "cvttps2dq" : __asm_proxy_CVTTPS2DQ__, + "cvttps2pi" : __asm_proxy_CVTTPS2PI__, + "cvttsd2si" : __asm_proxy_CVTTSD2SI__, + "cvttss2si" : __asm_proxy_CVTTSS2SI__, + "cwtd" : __asm_proxy_CWTD__, + "cwtl" : __asm_proxy_CWTL__, + "decb" : __asm_proxy_DECB__, + "decl" : __asm_proxy_DECL__, + "decq" : __asm_proxy_DECQ__, + "decw" : __asm_proxy_DECW__, + "divb" : __asm_proxy_DIVB__, + "divl" : __asm_proxy_DIVL__, + "divpd" : __asm_proxy_DIVPD__, + "divps" : __asm_proxy_DIVPS__, + "divq" : __asm_proxy_DIVQ__, + "divsd" : __asm_proxy_DIVSD__, + "divss" : __asm_proxy_DIVSS__, + "divw" : __asm_proxy_DIVW__, + "dppd" : __asm_proxy_DPPD__, + "dpps" : __asm_proxy_DPPS__, + "emms" : __asm_proxy_EMMS__, + "extractps" : __asm_proxy_EXTRACTPS__, + "extrq" : __asm_proxy_EXTRQ__, + "femms" : __asm_proxy_FEMMS__, + "haddpd" : __asm_proxy_HADDPD__, + "haddps" : __asm_proxy_HADDPS__, + "hsubpd" : __asm_proxy_HSUBPD__, + "hsubps" : __asm_proxy_HSUBPS__, + "idivb" : __asm_proxy_IDIVB__, + "idivl" : __asm_proxy_IDIVL__, + "idivq" : __asm_proxy_IDIVQ__, + "idivw" : __asm_proxy_IDIVW__, + "imulb" : __asm_proxy_IMULB__, + "imull" : __asm_proxy_IMULL__, + "imulq" : __asm_proxy_IMULQ__, + "imulw" : __asm_proxy_IMULW__, + "incb" : __asm_proxy_INCB__, + "incl" : __asm_proxy_INCL__, + "incq" : __asm_proxy_INCQ__, + "incw" : __asm_proxy_INCW__, + "insertps" : __asm_proxy_INSERTPS__, + "insertq" : __asm_proxy_INSERTQ__, + "int" : __asm_proxy_INT__, + "ja" : __asm_proxy_JA__, + "jae" : __asm_proxy_JAE__, + "jb" : __asm_proxy_JB__, + "jbe" : __asm_proxy_JBE__, + "jc" : __asm_proxy_JC__, + "je" : __asm_proxy_JE__, + "jecxz" : __asm_proxy_JECXZ__, + "jg" : __asm_proxy_JG__, + "jge" : __asm_proxy_JGE__, + "jl" : __asm_proxy_JL__, + "jle" : __asm_proxy_JLE__, + "jmp" : __asm_proxy_JMP__, + "jmpq" : __asm_proxy_JMPQ__, + "jna" : __asm_proxy_JNA__, + "jnae" : __asm_proxy_JNAE__, + "jnb" : __asm_proxy_JNB__, + "jnbe" : __asm_proxy_JNBE__, + "jnc" : __asm_proxy_JNC__, + "jne" : __asm_proxy_JNE__, + "jng" : __asm_proxy_JNG__, + "jnge" : __asm_proxy_JNGE__, + "jnl" : __asm_proxy_JNL__, + "jnle" : __asm_proxy_JNLE__, + "jno" : __asm_proxy_JNO__, + "jnp" : __asm_proxy_JNP__, + "jns" : __asm_proxy_JNS__, + "jnz" : __asm_proxy_JNZ__, + "jo" : __asm_proxy_JO__, + "jp" : __asm_proxy_JP__, + "jpe" : __asm_proxy_JPE__, + "jpo" : __asm_proxy_JPO__, + "jrcxz" : __asm_proxy_JRCXZ__, + "js" : __asm_proxy_JS__, + "jz" : __asm_proxy_JZ__, + "kaddb" : __asm_proxy_KADDB__, + "kaddd" : __asm_proxy_KADDD__, + "kaddq" : __asm_proxy_KADDQ__, + "kaddw" : __asm_proxy_KADDW__, + "kandb" : __asm_proxy_KANDB__, + "kandd" : __asm_proxy_KANDD__, + "kandnb" : __asm_proxy_KANDNB__, + "kandnd" : __asm_proxy_KANDND__, + "kandnq" : __asm_proxy_KANDNQ__, + "kandnw" : __asm_proxy_KANDNW__, + "kandq" : __asm_proxy_KANDQ__, + "kandw" : __asm_proxy_KANDW__, + "kmovb" : __asm_proxy_KMOVB__, + "kmovd" : __asm_proxy_KMOVD__, + "kmovq" : __asm_proxy_KMOVQ__, + "kmovw" : __asm_proxy_KMOVW__, + "knotb" : __asm_proxy_KNOTB__, + "knotd" : __asm_proxy_KNOTD__, + "knotq" : __asm_proxy_KNOTQ__, + "knotw" : __asm_proxy_KNOTW__, + "korb" : __asm_proxy_KORB__, + "kord" : __asm_proxy_KORD__, + "korq" : __asm_proxy_KORQ__, + "kortestb" : __asm_proxy_KORTESTB__, + "kortestd" : __asm_proxy_KORTESTD__, + "kortestq" : __asm_proxy_KORTESTQ__, + "kortestw" : __asm_proxy_KORTESTW__, + "korw" : __asm_proxy_KORW__, + "kshiftlb" : __asm_proxy_KSHIFTLB__, + "kshiftld" : __asm_proxy_KSHIFTLD__, + "kshiftlq" : __asm_proxy_KSHIFTLQ__, + "kshiftlw" : __asm_proxy_KSHIFTLW__, + "kshiftrb" : __asm_proxy_KSHIFTRB__, + "kshiftrd" : __asm_proxy_KSHIFTRD__, + "kshiftrq" : __asm_proxy_KSHIFTRQ__, + "kshiftrw" : __asm_proxy_KSHIFTRW__, + "ktestb" : __asm_proxy_KTESTB__, + "ktestd" : __asm_proxy_KTESTD__, + "ktestq" : __asm_proxy_KTESTQ__, + "ktestw" : __asm_proxy_KTESTW__, + "kunpckbw" : __asm_proxy_KUNPCKBW__, + "kunpckdq" : __asm_proxy_KUNPCKDQ__, + "kunpckwd" : __asm_proxy_KUNPCKWD__, + "kxnorb" : __asm_proxy_KXNORB__, + "kxnord" : __asm_proxy_KXNORD__, + "kxnorq" : __asm_proxy_KXNORQ__, + "kxnorw" : __asm_proxy_KXNORW__, + "kxorb" : __asm_proxy_KXORB__, + "kxord" : __asm_proxy_KXORD__, + "kxorq" : __asm_proxy_KXORQ__, + "kxorw" : __asm_proxy_KXORW__, + "lddqu" : __asm_proxy_LDDQU__, + "ldmxcsr" : __asm_proxy_LDMXCSR__, + "leal" : __asm_proxy_LEAL__, + "leaq" : __asm_proxy_LEAQ__, + "leaw" : __asm_proxy_LEAW__, + "lfence" : __asm_proxy_LFENCE__, + "lzcntl" : __asm_proxy_LZCNTL__, + "lzcntq" : __asm_proxy_LZCNTQ__, + "lzcntw" : __asm_proxy_LZCNTW__, + "maskmovdqu" : __asm_proxy_MASKMOVDQU__, + "maskmovq" : __asm_proxy_MASKMOVQ__, + "maxpd" : __asm_proxy_MAXPD__, + "maxps" : __asm_proxy_MAXPS__, + "maxsd" : __asm_proxy_MAXSD__, + "maxss" : __asm_proxy_MAXSS__, + "mfence" : __asm_proxy_MFENCE__, + "minpd" : __asm_proxy_MINPD__, + "minps" : __asm_proxy_MINPS__, + "minsd" : __asm_proxy_MINSD__, + "minss" : __asm_proxy_MINSS__, + "monitor" : __asm_proxy_MONITOR__, + "monitorx" : __asm_proxy_MONITORX__, + "movapd" : __asm_proxy_MOVAPD__, + "movaps" : __asm_proxy_MOVAPS__, + "movb" : __asm_proxy_MOVB__, + "movbel" : __asm_proxy_MOVBEL__, + "movbeq" : __asm_proxy_MOVBEQ__, + "movbew" : __asm_proxy_MOVBEW__, + "movd" : __asm_proxy_MOVD__, + "movddup" : __asm_proxy_MOVDDUP__, + "movdq2q" : __asm_proxy_MOVDQ2Q__, + "movdqa" : __asm_proxy_MOVDQA__, + "movdqu" : __asm_proxy_MOVDQU__, + "movhlps" : __asm_proxy_MOVHLPS__, + "movhpd" : __asm_proxy_MOVHPD__, + "movhps" : __asm_proxy_MOVHPS__, + "movl" : __asm_proxy_MOVL__, + "movlhps" : __asm_proxy_MOVLHPS__, + "movlpd" : __asm_proxy_MOVLPD__, + "movlps" : __asm_proxy_MOVLPS__, + "movmskpd" : __asm_proxy_MOVMSKPD__, + "movmskps" : __asm_proxy_MOVMSKPS__, + "movntdq" : __asm_proxy_MOVNTDQ__, + "movntdqa" : __asm_proxy_MOVNTDQA__, + "movntil" : __asm_proxy_MOVNTIL__, + "movntiq" : __asm_proxy_MOVNTIQ__, + "movntpd" : __asm_proxy_MOVNTPD__, + "movntps" : __asm_proxy_MOVNTPS__, + "movntq" : __asm_proxy_MOVNTQ__, + "movntsd" : __asm_proxy_MOVNTSD__, + "movntss" : __asm_proxy_MOVNTSS__, + "movq" : __asm_proxy_MOVQ__, + "movq2dq" : __asm_proxy_MOVQ2DQ__, + "movsbl" : __asm_proxy_MOVSBL__, + "movsbq" : __asm_proxy_MOVSBQ__, + "movsbw" : __asm_proxy_MOVSBW__, + "movsd" : __asm_proxy_MOVSD__, + "movshdup" : __asm_proxy_MOVSHDUP__, + "movsldup" : __asm_proxy_MOVSLDUP__, + "movslq" : __asm_proxy_MOVSLQ__, + "movss" : __asm_proxy_MOVSS__, + "movswl" : __asm_proxy_MOVSWL__, + "movswq" : __asm_proxy_MOVSWQ__, + "movupd" : __asm_proxy_MOVUPD__, + "movups" : __asm_proxy_MOVUPS__, + "movw" : __asm_proxy_MOVW__, + "movzbl" : __asm_proxy_MOVZBL__, + "movzbq" : __asm_proxy_MOVZBQ__, + "movzbw" : __asm_proxy_MOVZBW__, + "movzwl" : __asm_proxy_MOVZWL__, + "movzwq" : __asm_proxy_MOVZWQ__, + "mpsadbw" : __asm_proxy_MPSADBW__, + "mulb" : __asm_proxy_MULB__, + "mull" : __asm_proxy_MULL__, + "mulpd" : __asm_proxy_MULPD__, + "mulps" : __asm_proxy_MULPS__, + "mulq" : __asm_proxy_MULQ__, + "mulsd" : __asm_proxy_MULSD__, + "mulss" : __asm_proxy_MULSS__, + "mulw" : __asm_proxy_MULW__, + "mulxl" : __asm_proxy_MULXL__, + "mulxq" : __asm_proxy_MULXQ__, + "mwait" : __asm_proxy_MWAIT__, + "mwaitx" : __asm_proxy_MWAITX__, + "negb" : __asm_proxy_NEGB__, + "negl" : __asm_proxy_NEGL__, + "negq" : __asm_proxy_NEGQ__, + "negw" : __asm_proxy_NEGW__, + "nop" : __asm_proxy_NOP__, + "notb" : __asm_proxy_NOTB__, + "notl" : __asm_proxy_NOTL__, + "notq" : __asm_proxy_NOTQ__, + "notw" : __asm_proxy_NOTW__, + "orb" : __asm_proxy_ORB__, + "orl" : __asm_proxy_ORL__, + "orpd" : __asm_proxy_ORPD__, + "orps" : __asm_proxy_ORPS__, + "orq" : __asm_proxy_ORQ__, + "orw" : __asm_proxy_ORW__, + "pabsb" : __asm_proxy_PABSB__, + "pabsd" : __asm_proxy_PABSD__, + "pabsw" : __asm_proxy_PABSW__, + "packssdw" : __asm_proxy_PACKSSDW__, + "packsswb" : __asm_proxy_PACKSSWB__, + "packusdw" : __asm_proxy_PACKUSDW__, + "packuswb" : __asm_proxy_PACKUSWB__, + "paddb" : __asm_proxy_PADDB__, + "paddd" : __asm_proxy_PADDD__, + "paddq" : __asm_proxy_PADDQ__, + "paddsb" : __asm_proxy_PADDSB__, + "paddsw" : __asm_proxy_PADDSW__, + "paddusb" : __asm_proxy_PADDUSB__, + "paddusw" : __asm_proxy_PADDUSW__, + "paddw" : __asm_proxy_PADDW__, + "palignr" : __asm_proxy_PALIGNR__, + "pand" : __asm_proxy_PAND__, + "pandn" : __asm_proxy_PANDN__, + "pause" : __asm_proxy_PAUSE__, + "pavgb" : __asm_proxy_PAVGB__, + "pavgusb" : __asm_proxy_PAVGUSB__, + "pavgw" : __asm_proxy_PAVGW__, + "pblendvb" : __asm_proxy_PBLENDVB__, + "pblendw" : __asm_proxy_PBLENDW__, + "pclmulqdq" : __asm_proxy_PCLMULQDQ__, + "pcmpeqb" : __asm_proxy_PCMPEQB__, + "pcmpeqd" : __asm_proxy_PCMPEQD__, + "pcmpeqq" : __asm_proxy_PCMPEQQ__, + "pcmpeqw" : __asm_proxy_PCMPEQW__, + "pcmpestri" : __asm_proxy_PCMPESTRI__, + "pcmpestrm" : __asm_proxy_PCMPESTRM__, + "pcmpgtb" : __asm_proxy_PCMPGTB__, + "pcmpgtd" : __asm_proxy_PCMPGTD__, + "pcmpgtq" : __asm_proxy_PCMPGTQ__, + "pcmpgtw" : __asm_proxy_PCMPGTW__, + "pcmpistri" : __asm_proxy_PCMPISTRI__, + "pcmpistrm" : __asm_proxy_PCMPISTRM__, + "pdep" : __asm_proxy_PDEP__, + "pext" : __asm_proxy_PEXT__, + "pextrb" : __asm_proxy_PEXTRB__, + "pextrd" : __asm_proxy_PEXTRD__, + "pextrq" : __asm_proxy_PEXTRQ__, + "pextrw" : __asm_proxy_PEXTRW__, + "pf2id" : __asm_proxy_PF2ID__, + "pf2iw" : __asm_proxy_PF2IW__, + "pfacc" : __asm_proxy_PFACC__, + "pfadd" : __asm_proxy_PFADD__, + "pfcmpeq" : __asm_proxy_PFCMPEQ__, + "pfcmpge" : __asm_proxy_PFCMPGE__, + "pfcmpgt" : __asm_proxy_PFCMPGT__, + "pfmax" : __asm_proxy_PFMAX__, + "pfmin" : __asm_proxy_PFMIN__, + "pfmul" : __asm_proxy_PFMUL__, + "pfnacc" : __asm_proxy_PFNACC__, + "pfpnacc" : __asm_proxy_PFPNACC__, + "pfrcp" : __asm_proxy_PFRCP__, + "pfrcpit1" : __asm_proxy_PFRCPIT1__, + "pfrcpit2" : __asm_proxy_PFRCPIT2__, + "pfrsqit1" : __asm_proxy_PFRSQIT1__, + "pfrsqrt" : __asm_proxy_PFRSQRT__, + "pfsub" : __asm_proxy_PFSUB__, + "pfsubr" : __asm_proxy_PFSUBR__, + "phaddd" : __asm_proxy_PHADDD__, + "phaddsw" : __asm_proxy_PHADDSW__, + "phaddw" : __asm_proxy_PHADDW__, + "phminposuw" : __asm_proxy_PHMINPOSUW__, + "phsubd" : __asm_proxy_PHSUBD__, + "phsubsw" : __asm_proxy_PHSUBSW__, + "phsubw" : __asm_proxy_PHSUBW__, + "pi2fd" : __asm_proxy_PI2FD__, + "pi2fw" : __asm_proxy_PI2FW__, + "pinsrb" : __asm_proxy_PINSRB__, + "pinsrd" : __asm_proxy_PINSRD__, + "pinsrq" : __asm_proxy_PINSRQ__, + "pinsrw" : __asm_proxy_PINSRW__, + "pmaddubsw" : __asm_proxy_PMADDUBSW__, + "pmaddwd" : __asm_proxy_PMADDWD__, + "pmaxsb" : __asm_proxy_PMAXSB__, + "pmaxsd" : __asm_proxy_PMAXSD__, + "pmaxsw" : __asm_proxy_PMAXSW__, + "pmaxub" : __asm_proxy_PMAXUB__, + "pmaxud" : __asm_proxy_PMAXUD__, + "pmaxuw" : __asm_proxy_PMAXUW__, + "pminsb" : __asm_proxy_PMINSB__, + "pminsd" : __asm_proxy_PMINSD__, + "pminsw" : __asm_proxy_PMINSW__, + "pminub" : __asm_proxy_PMINUB__, + "pminud" : __asm_proxy_PMINUD__, + "pminuw" : __asm_proxy_PMINUW__, + "pmovmskb" : __asm_proxy_PMOVMSKB__, + "pmovsxbd" : __asm_proxy_PMOVSXBD__, + "pmovsxbq" : __asm_proxy_PMOVSXBQ__, + "pmovsxbw" : __asm_proxy_PMOVSXBW__, + "pmovsxdq" : __asm_proxy_PMOVSXDQ__, + "pmovsxwd" : __asm_proxy_PMOVSXWD__, + "pmovsxwq" : __asm_proxy_PMOVSXWQ__, + "pmovzxbd" : __asm_proxy_PMOVZXBD__, + "pmovzxbq" : __asm_proxy_PMOVZXBQ__, + "pmovzxbw" : __asm_proxy_PMOVZXBW__, + "pmovzxdq" : __asm_proxy_PMOVZXDQ__, + "pmovzxwd" : __asm_proxy_PMOVZXWD__, + "pmovzxwq" : __asm_proxy_PMOVZXWQ__, + "pmuldq" : __asm_proxy_PMULDQ__, + "pmulhrsw" : __asm_proxy_PMULHRSW__, + "pmulhrw" : __asm_proxy_PMULHRW__, + "pmulhuw" : __asm_proxy_PMULHUW__, + "pmulhw" : __asm_proxy_PMULHW__, + "pmulld" : __asm_proxy_PMULLD__, + "pmullw" : __asm_proxy_PMULLW__, + "pmuludq" : __asm_proxy_PMULUDQ__, + "popcntl" : __asm_proxy_POPCNTL__, + "popcntq" : __asm_proxy_POPCNTQ__, + "popcntw" : __asm_proxy_POPCNTW__, + "popq" : __asm_proxy_POPQ__, + "popw" : __asm_proxy_POPW__, + "por" : __asm_proxy_POR__, + "prefetch" : __asm_proxy_PREFETCH__, + "prefetchnta" : __asm_proxy_PREFETCHNTA__, + "prefetcht0" : __asm_proxy_PREFETCHT0__, + "prefetcht1" : __asm_proxy_PREFETCHT1__, + "prefetcht2" : __asm_proxy_PREFETCHT2__, + "prefetchw" : __asm_proxy_PREFETCHW__, + "prefetchwt1" : __asm_proxy_PREFETCHWT1__, + "psadbw" : __asm_proxy_PSADBW__, + "pshufb" : __asm_proxy_PSHUFB__, + "pshufd" : __asm_proxy_PSHUFD__, + "pshufhw" : __asm_proxy_PSHUFHW__, + "pshuflw" : __asm_proxy_PSHUFLW__, + "pshufw" : __asm_proxy_PSHUFW__, + "psignb" : __asm_proxy_PSIGNB__, + "psignd" : __asm_proxy_PSIGND__, + "psignw" : __asm_proxy_PSIGNW__, + "pslld" : __asm_proxy_PSLLD__, + "pslldq" : __asm_proxy_PSLLDQ__, + "psllq" : __asm_proxy_PSLLQ__, + "psllw" : __asm_proxy_PSLLW__, + "psrad" : __asm_proxy_PSRAD__, + "psraw" : __asm_proxy_PSRAW__, + "psrld" : __asm_proxy_PSRLD__, + "psrldq" : __asm_proxy_PSRLDQ__, + "psrlq" : __asm_proxy_PSRLQ__, + "psrlw" : __asm_proxy_PSRLW__, + "psubb" : __asm_proxy_PSUBB__, + "psubd" : __asm_proxy_PSUBD__, + "psubq" : __asm_proxy_PSUBQ__, + "psubsb" : __asm_proxy_PSUBSB__, + "psubsw" : __asm_proxy_PSUBSW__, + "psubusb" : __asm_proxy_PSUBUSB__, + "psubusw" : __asm_proxy_PSUBUSW__, + "psubw" : __asm_proxy_PSUBW__, + "pswapd" : __asm_proxy_PSWAPD__, + "ptest" : __asm_proxy_PTEST__, + "punpckhbw" : __asm_proxy_PUNPCKHBW__, + "punpckhdq" : __asm_proxy_PUNPCKHDQ__, + "punpckhqdq" : __asm_proxy_PUNPCKHQDQ__, + "punpckhwd" : __asm_proxy_PUNPCKHWD__, + "punpcklbw" : __asm_proxy_PUNPCKLBW__, + "punpckldq" : __asm_proxy_PUNPCKLDQ__, + "punpcklqdq" : __asm_proxy_PUNPCKLQDQ__, + "punpcklwd" : __asm_proxy_PUNPCKLWD__, + "pushq" : __asm_proxy_PUSHQ__, + "pushw" : __asm_proxy_PUSHW__, + "pxor" : __asm_proxy_PXOR__, + "rclb" : __asm_proxy_RCLB__, + "rcll" : __asm_proxy_RCLL__, + "rclq" : __asm_proxy_RCLQ__, + "rclw" : __asm_proxy_RCLW__, + "rcpps" : __asm_proxy_RCPPS__, + "rcpss" : __asm_proxy_RCPSS__, + "rcrb" : __asm_proxy_RCRB__, + "rcrl" : __asm_proxy_RCRL__, + "rcrq" : __asm_proxy_RCRQ__, + "rcrw" : __asm_proxy_RCRW__, + "rdrand" : __asm_proxy_RDRAND__, + "rdseed" : __asm_proxy_RDSEED__, + "rdtsc" : __asm_proxy_RDTSC__, + "rdtscp" : __asm_proxy_RDTSCP__, + "ret" : __asm_proxy_RET__, + "rolb" : __asm_proxy_ROLB__, + "roll" : __asm_proxy_ROLL__, + "rolq" : __asm_proxy_ROLQ__, + "rolw" : __asm_proxy_ROLW__, + "rorb" : __asm_proxy_RORB__, + "rorl" : __asm_proxy_RORL__, + "rorq" : __asm_proxy_RORQ__, + "rorw" : __asm_proxy_RORW__, + "rorxl" : __asm_proxy_RORXL__, + "rorxq" : __asm_proxy_RORXQ__, + "roundpd" : __asm_proxy_ROUNDPD__, + "roundps" : __asm_proxy_ROUNDPS__, + "roundsd" : __asm_proxy_ROUNDSD__, + "roundss" : __asm_proxy_ROUNDSS__, + "rsqrtps" : __asm_proxy_RSQRTPS__, + "rsqrtss" : __asm_proxy_RSQRTSS__, + "salb" : __asm_proxy_SALB__, + "sall" : __asm_proxy_SALL__, + "salq" : __asm_proxy_SALQ__, + "salw" : __asm_proxy_SALW__, + "sarb" : __asm_proxy_SARB__, + "sarl" : __asm_proxy_SARL__, + "sarq" : __asm_proxy_SARQ__, + "sarw" : __asm_proxy_SARW__, + "sarxl" : __asm_proxy_SARXL__, + "sarxq" : __asm_proxy_SARXQ__, + "sbbb" : __asm_proxy_SBBB__, + "sbbl" : __asm_proxy_SBBL__, + "sbbq" : __asm_proxy_SBBQ__, + "sbbw" : __asm_proxy_SBBW__, + "seta" : __asm_proxy_SETA__, + "setae" : __asm_proxy_SETAE__, + "setb" : __asm_proxy_SETB__, + "setbe" : __asm_proxy_SETBE__, + "setc" : __asm_proxy_SETC__, + "sete" : __asm_proxy_SETE__, + "setg" : __asm_proxy_SETG__, + "setge" : __asm_proxy_SETGE__, + "setl" : __asm_proxy_SETL__, + "setle" : __asm_proxy_SETLE__, + "setna" : __asm_proxy_SETNA__, + "setnae" : __asm_proxy_SETNAE__, + "setnb" : __asm_proxy_SETNB__, + "setnbe" : __asm_proxy_SETNBE__, + "setnc" : __asm_proxy_SETNC__, + "setne" : __asm_proxy_SETNE__, + "setng" : __asm_proxy_SETNG__, + "setnge" : __asm_proxy_SETNGE__, + "setnl" : __asm_proxy_SETNL__, + "setnle" : __asm_proxy_SETNLE__, + "setno" : __asm_proxy_SETNO__, + "setnp" : __asm_proxy_SETNP__, + "setns" : __asm_proxy_SETNS__, + "setnz" : __asm_proxy_SETNZ__, + "seto" : __asm_proxy_SETO__, + "setp" : __asm_proxy_SETP__, + "setpe" : __asm_proxy_SETPE__, + "setpo" : __asm_proxy_SETPO__, + "sets" : __asm_proxy_SETS__, + "setz" : __asm_proxy_SETZ__, + "sfence" : __asm_proxy_SFENCE__, + "sha1msg1" : __asm_proxy_SHA1MSG1__, + "sha1msg2" : __asm_proxy_SHA1MSG2__, + "sha1nexte" : __asm_proxy_SHA1NEXTE__, + "sha1rnds4" : __asm_proxy_SHA1RNDS4__, + "sha256msg1" : __asm_proxy_SHA256MSG1__, + "sha256msg2" : __asm_proxy_SHA256MSG2__, + "sha256rnds2" : __asm_proxy_SHA256RNDS2__, + "shlb" : __asm_proxy_SHLB__, + "shldl" : __asm_proxy_SHLDL__, + "shldq" : __asm_proxy_SHLDQ__, + "shldw" : __asm_proxy_SHLDW__, + "shll" : __asm_proxy_SHLL__, + "shlq" : __asm_proxy_SHLQ__, + "shlw" : __asm_proxy_SHLW__, + "shlxl" : __asm_proxy_SHLXL__, + "shlxq" : __asm_proxy_SHLXQ__, + "shrb" : __asm_proxy_SHRB__, + "shrdl" : __asm_proxy_SHRDL__, + "shrdq" : __asm_proxy_SHRDQ__, + "shrdw" : __asm_proxy_SHRDW__, + "shrl" : __asm_proxy_SHRL__, + "shrq" : __asm_proxy_SHRQ__, + "shrw" : __asm_proxy_SHRW__, + "shrxl" : __asm_proxy_SHRXL__, + "shrxq" : __asm_proxy_SHRXQ__, + "shufpd" : __asm_proxy_SHUFPD__, + "shufps" : __asm_proxy_SHUFPS__, + "sqrtpd" : __asm_proxy_SQRTPD__, + "sqrtps" : __asm_proxy_SQRTPS__, + "sqrtsd" : __asm_proxy_SQRTSD__, + "sqrtss" : __asm_proxy_SQRTSS__, + "stc" : __asm_proxy_STC__, + "std" : __asm_proxy_STD__, + "stmxcsr" : __asm_proxy_STMXCSR__, + "subb" : __asm_proxy_SUBB__, + "subl" : __asm_proxy_SUBL__, + "subpd" : __asm_proxy_SUBPD__, + "subps" : __asm_proxy_SUBPS__, + "subq" : __asm_proxy_SUBQ__, + "subsd" : __asm_proxy_SUBSD__, + "subss" : __asm_proxy_SUBSS__, + "subw" : __asm_proxy_SUBW__, + "syscall" : __asm_proxy_SYSCALL__, + "t1mskc" : __asm_proxy_T1MSKC__, + "testb" : __asm_proxy_TESTB__, + "testl" : __asm_proxy_TESTL__, + "testq" : __asm_proxy_TESTQ__, + "testw" : __asm_proxy_TESTW__, + "tzcntl" : __asm_proxy_TZCNTL__, + "tzcntq" : __asm_proxy_TZCNTQ__, + "tzcntw" : __asm_proxy_TZCNTW__, + "tzmsk" : __asm_proxy_TZMSK__, + "ucomisd" : __asm_proxy_UCOMISD__, + "ucomiss" : __asm_proxy_UCOMISS__, + "ud2" : __asm_proxy_UD2__, + "unpckhpd" : __asm_proxy_UNPCKHPD__, + "unpckhps" : __asm_proxy_UNPCKHPS__, + "unpcklpd" : __asm_proxy_UNPCKLPD__, + "unpcklps" : __asm_proxy_UNPCKLPS__, + "vaddpd" : __asm_proxy_VADDPD__, + "vaddps" : __asm_proxy_VADDPS__, + "vaddsd" : __asm_proxy_VADDSD__, + "vaddss" : __asm_proxy_VADDSS__, + "vaddsubpd" : __asm_proxy_VADDSUBPD__, + "vaddsubps" : __asm_proxy_VADDSUBPS__, + "vaesdec" : __asm_proxy_VAESDEC__, + "vaesdeclast" : __asm_proxy_VAESDECLAST__, + "vaesenc" : __asm_proxy_VAESENC__, + "vaesenclast" : __asm_proxy_VAESENCLAST__, + "vaesimc" : __asm_proxy_VAESIMC__, + "vaeskeygenassist" : __asm_proxy_VAESKEYGENASSIST__, + "valignd" : __asm_proxy_VALIGND__, + "valignq" : __asm_proxy_VALIGNQ__, + "vandnpd" : __asm_proxy_VANDNPD__, + "vandnps" : __asm_proxy_VANDNPS__, + "vandpd" : __asm_proxy_VANDPD__, + "vandps" : __asm_proxy_VANDPS__, + "vblendmpd" : __asm_proxy_VBLENDMPD__, + "vblendmps" : __asm_proxy_VBLENDMPS__, + "vblendpd" : __asm_proxy_VBLENDPD__, + "vblendps" : __asm_proxy_VBLENDPS__, + "vblendvpd" : __asm_proxy_VBLENDVPD__, + "vblendvps" : __asm_proxy_VBLENDVPS__, + "vbroadcastf128" : __asm_proxy_VBROADCASTF128__, + "vbroadcastf32x2" : __asm_proxy_VBROADCASTF32X2__, + "vbroadcastf32x4" : __asm_proxy_VBROADCASTF32X4__, + "vbroadcastf32x8" : __asm_proxy_VBROADCASTF32X8__, + "vbroadcastf64x2" : __asm_proxy_VBROADCASTF64X2__, + "vbroadcastf64x4" : __asm_proxy_VBROADCASTF64X4__, + "vbroadcasti128" : __asm_proxy_VBROADCASTI128__, + "vbroadcasti32x2" : __asm_proxy_VBROADCASTI32X2__, + "vbroadcasti32x4" : __asm_proxy_VBROADCASTI32X4__, + "vbroadcasti32x8" : __asm_proxy_VBROADCASTI32X8__, + "vbroadcasti64x2" : __asm_proxy_VBROADCASTI64X2__, + "vbroadcasti64x4" : __asm_proxy_VBROADCASTI64X4__, + "vbroadcastsd" : __asm_proxy_VBROADCASTSD__, + "vbroadcastss" : __asm_proxy_VBROADCASTSS__, + "vcmppd" : __asm_proxy_VCMPPD__, + "vcmpps" : __asm_proxy_VCMPPS__, + "vcmpsd" : __asm_proxy_VCMPSD__, + "vcmpss" : __asm_proxy_VCMPSS__, + "vcomisd" : __asm_proxy_VCOMISD__, + "vcomiss" : __asm_proxy_VCOMISS__, + "vcompresspd" : __asm_proxy_VCOMPRESSPD__, + "vcompressps" : __asm_proxy_VCOMPRESSPS__, + "vcvtdq2pd" : __asm_proxy_VCVTDQ2PD__, + "vcvtdq2ps" : __asm_proxy_VCVTDQ2PS__, + "vcvtpd2dq" : __asm_proxy_VCVTPD2DQ__, + "vcvtpd2ps" : __asm_proxy_VCVTPD2PS__, + "vcvtpd2qq" : __asm_proxy_VCVTPD2QQ__, + "vcvtpd2udq" : __asm_proxy_VCVTPD2UDQ__, + "vcvtpd2uqq" : __asm_proxy_VCVTPD2UQQ__, + "vcvtph2ps" : __asm_proxy_VCVTPH2PS__, + "vcvtps2dq" : __asm_proxy_VCVTPS2DQ__, + "vcvtps2pd" : __asm_proxy_VCVTPS2PD__, + "vcvtps2ph" : __asm_proxy_VCVTPS2PH__, + "vcvtps2qq" : __asm_proxy_VCVTPS2QQ__, + "vcvtps2udq" : __asm_proxy_VCVTPS2UDQ__, + "vcvtps2uqq" : __asm_proxy_VCVTPS2UQQ__, + "vcvtqq2pd" : __asm_proxy_VCVTQQ2PD__, + "vcvtqq2ps" : __asm_proxy_VCVTQQ2PS__, + "vcvtsd2si" : __asm_proxy_VCVTSD2SI__, + "vcvtsd2ss" : __asm_proxy_VCVTSD2SS__, + "vcvtsd2usi" : __asm_proxy_VCVTSD2USI__, + "vcvtsi2sd" : __asm_proxy_VCVTSI2SD__, + "vcvtsi2ss" : __asm_proxy_VCVTSI2SS__, + "vcvtss2sd" : __asm_proxy_VCVTSS2SD__, + "vcvtss2si" : __asm_proxy_VCVTSS2SI__, + "vcvtss2usi" : __asm_proxy_VCVTSS2USI__, + "vcvttpd2dq" : __asm_proxy_VCVTTPD2DQ__, + "vcvttpd2qq" : __asm_proxy_VCVTTPD2QQ__, + "vcvttpd2udq" : __asm_proxy_VCVTTPD2UDQ__, + "vcvttpd2uqq" : __asm_proxy_VCVTTPD2UQQ__, + "vcvttps2dq" : __asm_proxy_VCVTTPS2DQ__, + "vcvttps2qq" : __asm_proxy_VCVTTPS2QQ__, + "vcvttps2udq" : __asm_proxy_VCVTTPS2UDQ__, + "vcvttps2uqq" : __asm_proxy_VCVTTPS2UQQ__, + "vcvttsd2si" : __asm_proxy_VCVTTSD2SI__, + "vcvttsd2usi" : __asm_proxy_VCVTTSD2USI__, + "vcvttss2si" : __asm_proxy_VCVTTSS2SI__, + "vcvttss2usi" : __asm_proxy_VCVTTSS2USI__, + "vcvtudq2pd" : __asm_proxy_VCVTUDQ2PD__, + "vcvtudq2ps" : __asm_proxy_VCVTUDQ2PS__, + "vcvtuqq2pd" : __asm_proxy_VCVTUQQ2PD__, + "vcvtuqq2ps" : __asm_proxy_VCVTUQQ2PS__, + "vcvtusi2sd" : __asm_proxy_VCVTUSI2SD__, + "vcvtusi2ss" : __asm_proxy_VCVTUSI2SS__, + "vdbpsadbw" : __asm_proxy_VDBPSADBW__, + "vdivpd" : __asm_proxy_VDIVPD__, + "vdivps" : __asm_proxy_VDIVPS__, + "vdivsd" : __asm_proxy_VDIVSD__, + "vdivss" : __asm_proxy_VDIVSS__, + "vdppd" : __asm_proxy_VDPPD__, + "vdpps" : __asm_proxy_VDPPS__, + "vexp2pd" : __asm_proxy_VEXP2PD__, + "vexp2ps" : __asm_proxy_VEXP2PS__, + "vexpandpd" : __asm_proxy_VEXPANDPD__, + "vexpandps" : __asm_proxy_VEXPANDPS__, + "vextractf128" : __asm_proxy_VEXTRACTF128__, + "vextractf32x4" : __asm_proxy_VEXTRACTF32X4__, + "vextractf32x8" : __asm_proxy_VEXTRACTF32X8__, + "vextractf64x2" : __asm_proxy_VEXTRACTF64X2__, + "vextractf64x4" : __asm_proxy_VEXTRACTF64X4__, + "vextracti128" : __asm_proxy_VEXTRACTI128__, + "vextracti32x4" : __asm_proxy_VEXTRACTI32X4__, + "vextracti32x8" : __asm_proxy_VEXTRACTI32X8__, + "vextracti64x2" : __asm_proxy_VEXTRACTI64X2__, + "vextracti64x4" : __asm_proxy_VEXTRACTI64X4__, + "vextractps" : __asm_proxy_VEXTRACTPS__, + "vfixupimmpd" : __asm_proxy_VFIXUPIMMPD__, + "vfixupimmps" : __asm_proxy_VFIXUPIMMPS__, + "vfixupimmsd" : __asm_proxy_VFIXUPIMMSD__, + "vfixupimmss" : __asm_proxy_VFIXUPIMMSS__, + "vfmadd132pd" : __asm_proxy_VFMADD132PD__, + "vfmadd132ps" : __asm_proxy_VFMADD132PS__, + "vfmadd132sd" : __asm_proxy_VFMADD132SD__, + "vfmadd132ss" : __asm_proxy_VFMADD132SS__, + "vfmadd213pd" : __asm_proxy_VFMADD213PD__, + "vfmadd213ps" : __asm_proxy_VFMADD213PS__, + "vfmadd213sd" : __asm_proxy_VFMADD213SD__, + "vfmadd213ss" : __asm_proxy_VFMADD213SS__, + "vfmadd231pd" : __asm_proxy_VFMADD231PD__, + "vfmadd231ps" : __asm_proxy_VFMADD231PS__, + "vfmadd231sd" : __asm_proxy_VFMADD231SD__, + "vfmadd231ss" : __asm_proxy_VFMADD231SS__, + "vfmaddpd" : __asm_proxy_VFMADDPD__, + "vfmaddps" : __asm_proxy_VFMADDPS__, + "vfmaddsd" : __asm_proxy_VFMADDSD__, + "vfmaddss" : __asm_proxy_VFMADDSS__, + "vfmaddsub132pd" : __asm_proxy_VFMADDSUB132PD__, + "vfmaddsub132ps" : __asm_proxy_VFMADDSUB132PS__, + "vfmaddsub213pd" : __asm_proxy_VFMADDSUB213PD__, + "vfmaddsub213ps" : __asm_proxy_VFMADDSUB213PS__, + "vfmaddsub231pd" : __asm_proxy_VFMADDSUB231PD__, + "vfmaddsub231ps" : __asm_proxy_VFMADDSUB231PS__, + "vfmaddsubpd" : __asm_proxy_VFMADDSUBPD__, + "vfmaddsubps" : __asm_proxy_VFMADDSUBPS__, + "vfmsub132pd" : __asm_proxy_VFMSUB132PD__, + "vfmsub132ps" : __asm_proxy_VFMSUB132PS__, + "vfmsub132sd" : __asm_proxy_VFMSUB132SD__, + "vfmsub132ss" : __asm_proxy_VFMSUB132SS__, + "vfmsub213pd" : __asm_proxy_VFMSUB213PD__, + "vfmsub213ps" : __asm_proxy_VFMSUB213PS__, + "vfmsub213sd" : __asm_proxy_VFMSUB213SD__, + "vfmsub213ss" : __asm_proxy_VFMSUB213SS__, + "vfmsub231pd" : __asm_proxy_VFMSUB231PD__, + "vfmsub231ps" : __asm_proxy_VFMSUB231PS__, + "vfmsub231sd" : __asm_proxy_VFMSUB231SD__, + "vfmsub231ss" : __asm_proxy_VFMSUB231SS__, + "vfmsubadd132pd" : __asm_proxy_VFMSUBADD132PD__, + "vfmsubadd132ps" : __asm_proxy_VFMSUBADD132PS__, + "vfmsubadd213pd" : __asm_proxy_VFMSUBADD213PD__, + "vfmsubadd213ps" : __asm_proxy_VFMSUBADD213PS__, + "vfmsubadd231pd" : __asm_proxy_VFMSUBADD231PD__, + "vfmsubadd231ps" : __asm_proxy_VFMSUBADD231PS__, + "vfmsubaddpd" : __asm_proxy_VFMSUBADDPD__, + "vfmsubaddps" : __asm_proxy_VFMSUBADDPS__, + "vfmsubpd" : __asm_proxy_VFMSUBPD__, + "vfmsubps" : __asm_proxy_VFMSUBPS__, + "vfmsubsd" : __asm_proxy_VFMSUBSD__, + "vfmsubss" : __asm_proxy_VFMSUBSS__, + "vfnmadd132pd" : __asm_proxy_VFNMADD132PD__, + "vfnmadd132ps" : __asm_proxy_VFNMADD132PS__, + "vfnmadd132sd" : __asm_proxy_VFNMADD132SD__, + "vfnmadd132ss" : __asm_proxy_VFNMADD132SS__, + "vfnmadd213pd" : __asm_proxy_VFNMADD213PD__, + "vfnmadd213ps" : __asm_proxy_VFNMADD213PS__, + "vfnmadd213sd" : __asm_proxy_VFNMADD213SD__, + "vfnmadd213ss" : __asm_proxy_VFNMADD213SS__, + "vfnmadd231pd" : __asm_proxy_VFNMADD231PD__, + "vfnmadd231ps" : __asm_proxy_VFNMADD231PS__, + "vfnmadd231sd" : __asm_proxy_VFNMADD231SD__, + "vfnmadd231ss" : __asm_proxy_VFNMADD231SS__, + "vfnmaddpd" : __asm_proxy_VFNMADDPD__, + "vfnmaddps" : __asm_proxy_VFNMADDPS__, + "vfnmaddsd" : __asm_proxy_VFNMADDSD__, + "vfnmaddss" : __asm_proxy_VFNMADDSS__, + "vfnmsub132pd" : __asm_proxy_VFNMSUB132PD__, + "vfnmsub132ps" : __asm_proxy_VFNMSUB132PS__, + "vfnmsub132sd" : __asm_proxy_VFNMSUB132SD__, + "vfnmsub132ss" : __asm_proxy_VFNMSUB132SS__, + "vfnmsub213pd" : __asm_proxy_VFNMSUB213PD__, + "vfnmsub213ps" : __asm_proxy_VFNMSUB213PS__, + "vfnmsub213sd" : __asm_proxy_VFNMSUB213SD__, + "vfnmsub213ss" : __asm_proxy_VFNMSUB213SS__, + "vfnmsub231pd" : __asm_proxy_VFNMSUB231PD__, + "vfnmsub231ps" : __asm_proxy_VFNMSUB231PS__, + "vfnmsub231sd" : __asm_proxy_VFNMSUB231SD__, + "vfnmsub231ss" : __asm_proxy_VFNMSUB231SS__, + "vfnmsubpd" : __asm_proxy_VFNMSUBPD__, + "vfnmsubps" : __asm_proxy_VFNMSUBPS__, + "vfnmsubsd" : __asm_proxy_VFNMSUBSD__, + "vfnmsubss" : __asm_proxy_VFNMSUBSS__, + "vfpclasspd" : __asm_proxy_VFPCLASSPD__, + "vfpclassps" : __asm_proxy_VFPCLASSPS__, + "vfpclasssd" : __asm_proxy_VFPCLASSSD__, + "vfpclassss" : __asm_proxy_VFPCLASSSS__, + "vfrczpd" : __asm_proxy_VFRCZPD__, + "vfrczps" : __asm_proxy_VFRCZPS__, + "vfrczsd" : __asm_proxy_VFRCZSD__, + "vfrczss" : __asm_proxy_VFRCZSS__, + "vgatherdpd" : __asm_proxy_VGATHERDPD__, + "vgatherdps" : __asm_proxy_VGATHERDPS__, + "vgatherpf0dpd" : __asm_proxy_VGATHERPF0DPD__, + "vgatherpf0dps" : __asm_proxy_VGATHERPF0DPS__, + "vgatherpf0qpd" : __asm_proxy_VGATHERPF0QPD__, + "vgatherpf0qps" : __asm_proxy_VGATHERPF0QPS__, + "vgatherpf1dpd" : __asm_proxy_VGATHERPF1DPD__, + "vgatherpf1dps" : __asm_proxy_VGATHERPF1DPS__, + "vgatherpf1qpd" : __asm_proxy_VGATHERPF1QPD__, + "vgatherpf1qps" : __asm_proxy_VGATHERPF1QPS__, + "vgatherqpd" : __asm_proxy_VGATHERQPD__, + "vgatherqps" : __asm_proxy_VGATHERQPS__, + "vgetexppd" : __asm_proxy_VGETEXPPD__, + "vgetexpps" : __asm_proxy_VGETEXPPS__, + "vgetexpsd" : __asm_proxy_VGETEXPSD__, + "vgetexpss" : __asm_proxy_VGETEXPSS__, + "vgetmantpd" : __asm_proxy_VGETMANTPD__, + "vgetmantps" : __asm_proxy_VGETMANTPS__, + "vgetmantsd" : __asm_proxy_VGETMANTSD__, + "vgetmantss" : __asm_proxy_VGETMANTSS__, + "vhaddpd" : __asm_proxy_VHADDPD__, + "vhaddps" : __asm_proxy_VHADDPS__, + "vhsubpd" : __asm_proxy_VHSUBPD__, + "vhsubps" : __asm_proxy_VHSUBPS__, + "vinsertf128" : __asm_proxy_VINSERTF128__, + "vinsertf32x4" : __asm_proxy_VINSERTF32X4__, + "vinsertf32x8" : __asm_proxy_VINSERTF32X8__, + "vinsertf64x2" : __asm_proxy_VINSERTF64X2__, + "vinsertf64x4" : __asm_proxy_VINSERTF64X4__, + "vinserti128" : __asm_proxy_VINSERTI128__, + "vinserti32x4" : __asm_proxy_VINSERTI32X4__, + "vinserti32x8" : __asm_proxy_VINSERTI32X8__, + "vinserti64x2" : __asm_proxy_VINSERTI64X2__, + "vinserti64x4" : __asm_proxy_VINSERTI64X4__, + "vinsertps" : __asm_proxy_VINSERTPS__, + "vlddqu" : __asm_proxy_VLDDQU__, + "vldmxcsr" : __asm_proxy_VLDMXCSR__, + "vmaskmovdqu" : __asm_proxy_VMASKMOVDQU__, + "vmaskmovpd" : __asm_proxy_VMASKMOVPD__, + "vmaskmovps" : __asm_proxy_VMASKMOVPS__, + "vmaxpd" : __asm_proxy_VMAXPD__, + "vmaxps" : __asm_proxy_VMAXPS__, + "vmaxsd" : __asm_proxy_VMAXSD__, + "vmaxss" : __asm_proxy_VMAXSS__, + "vminpd" : __asm_proxy_VMINPD__, + "vminps" : __asm_proxy_VMINPS__, + "vminsd" : __asm_proxy_VMINSD__, + "vminss" : __asm_proxy_VMINSS__, + "vmovapd" : __asm_proxy_VMOVAPD__, + "vmovaps" : __asm_proxy_VMOVAPS__, + "vmovd" : __asm_proxy_VMOVD__, + "vmovddup" : __asm_proxy_VMOVDDUP__, + "vmovdqa" : __asm_proxy_VMOVDQA__, + "vmovdqa32" : __asm_proxy_VMOVDQA32__, + "vmovdqa64" : __asm_proxy_VMOVDQA64__, + "vmovdqu" : __asm_proxy_VMOVDQU__, + "vmovdqu16" : __asm_proxy_VMOVDQU16__, + "vmovdqu32" : __asm_proxy_VMOVDQU32__, + "vmovdqu64" : __asm_proxy_VMOVDQU64__, + "vmovdqu8" : __asm_proxy_VMOVDQU8__, + "vmovhlps" : __asm_proxy_VMOVHLPS__, + "vmovhpd" : __asm_proxy_VMOVHPD__, + "vmovhps" : __asm_proxy_VMOVHPS__, + "vmovlhps" : __asm_proxy_VMOVLHPS__, + "vmovlpd" : __asm_proxy_VMOVLPD__, + "vmovlps" : __asm_proxy_VMOVLPS__, + "vmovmskpd" : __asm_proxy_VMOVMSKPD__, + "vmovmskps" : __asm_proxy_VMOVMSKPS__, + "vmovntdq" : __asm_proxy_VMOVNTDQ__, + "vmovntdqa" : __asm_proxy_VMOVNTDQA__, + "vmovntpd" : __asm_proxy_VMOVNTPD__, + "vmovntps" : __asm_proxy_VMOVNTPS__, + "vmovq" : __asm_proxy_VMOVQ__, + "vmovsd" : __asm_proxy_VMOVSD__, + "vmovshdup" : __asm_proxy_VMOVSHDUP__, + "vmovsldup" : __asm_proxy_VMOVSLDUP__, + "vmovss" : __asm_proxy_VMOVSS__, + "vmovupd" : __asm_proxy_VMOVUPD__, + "vmovups" : __asm_proxy_VMOVUPS__, + "vmpsadbw" : __asm_proxy_VMPSADBW__, + "vmulpd" : __asm_proxy_VMULPD__, + "vmulps" : __asm_proxy_VMULPS__, + "vmulsd" : __asm_proxy_VMULSD__, + "vmulss" : __asm_proxy_VMULSS__, + "vorpd" : __asm_proxy_VORPD__, + "vorps" : __asm_proxy_VORPS__, + "vpabsb" : __asm_proxy_VPABSB__, + "vpabsd" : __asm_proxy_VPABSD__, + "vpabsq" : __asm_proxy_VPABSQ__, + "vpabsw" : __asm_proxy_VPABSW__, + "vpackssdw" : __asm_proxy_VPACKSSDW__, + "vpacksswb" : __asm_proxy_VPACKSSWB__, + "vpackusdw" : __asm_proxy_VPACKUSDW__, + "vpackuswb" : __asm_proxy_VPACKUSWB__, + "vpaddb" : __asm_proxy_VPADDB__, + "vpaddd" : __asm_proxy_VPADDD__, + "vpaddq" : __asm_proxy_VPADDQ__, + "vpaddsb" : __asm_proxy_VPADDSB__, + "vpaddsw" : __asm_proxy_VPADDSW__, + "vpaddusb" : __asm_proxy_VPADDUSB__, + "vpaddusw" : __asm_proxy_VPADDUSW__, + "vpaddw" : __asm_proxy_VPADDW__, + "vpalignr" : __asm_proxy_VPALIGNR__, + "vpand" : __asm_proxy_VPAND__, + "vpandd" : __asm_proxy_VPANDD__, + "vpandn" : __asm_proxy_VPANDN__, + "vpandnd" : __asm_proxy_VPANDND__, + "vpandnq" : __asm_proxy_VPANDNQ__, + "vpandq" : __asm_proxy_VPANDQ__, + "vpavgb" : __asm_proxy_VPAVGB__, + "vpavgw" : __asm_proxy_VPAVGW__, + "vpblendd" : __asm_proxy_VPBLENDD__, + "vpblendmb" : __asm_proxy_VPBLENDMB__, + "vpblendmd" : __asm_proxy_VPBLENDMD__, + "vpblendmq" : __asm_proxy_VPBLENDMQ__, + "vpblendmw" : __asm_proxy_VPBLENDMW__, + "vpblendvb" : __asm_proxy_VPBLENDVB__, + "vpblendw" : __asm_proxy_VPBLENDW__, + "vpbroadcastb" : __asm_proxy_VPBROADCASTB__, + "vpbroadcastd" : __asm_proxy_VPBROADCASTD__, + "vpbroadcastmb2q" : __asm_proxy_VPBROADCASTMB2Q__, + "vpbroadcastmw2d" : __asm_proxy_VPBROADCASTMW2D__, + "vpbroadcastq" : __asm_proxy_VPBROADCASTQ__, + "vpbroadcastw" : __asm_proxy_VPBROADCASTW__, + "vpclmulqdq" : __asm_proxy_VPCLMULQDQ__, + "vpcmov" : __asm_proxy_VPCMOV__, + "vpcmpb" : __asm_proxy_VPCMPB__, + "vpcmpd" : __asm_proxy_VPCMPD__, + "vpcmpeqb" : __asm_proxy_VPCMPEQB__, + "vpcmpeqd" : __asm_proxy_VPCMPEQD__, + "vpcmpeqq" : __asm_proxy_VPCMPEQQ__, + "vpcmpeqw" : __asm_proxy_VPCMPEQW__, + "vpcmpestri" : __asm_proxy_VPCMPESTRI__, + "vpcmpestrm" : __asm_proxy_VPCMPESTRM__, + "vpcmpgtb" : __asm_proxy_VPCMPGTB__, + "vpcmpgtd" : __asm_proxy_VPCMPGTD__, + "vpcmpgtq" : __asm_proxy_VPCMPGTQ__, + "vpcmpgtw" : __asm_proxy_VPCMPGTW__, + "vpcmpistri" : __asm_proxy_VPCMPISTRI__, + "vpcmpistrm" : __asm_proxy_VPCMPISTRM__, + "vpcmpq" : __asm_proxy_VPCMPQ__, + "vpcmpub" : __asm_proxy_VPCMPUB__, + "vpcmpud" : __asm_proxy_VPCMPUD__, + "vpcmpuq" : __asm_proxy_VPCMPUQ__, + "vpcmpuw" : __asm_proxy_VPCMPUW__, + "vpcmpw" : __asm_proxy_VPCMPW__, + "vpcomb" : __asm_proxy_VPCOMB__, + "vpcomd" : __asm_proxy_VPCOMD__, + "vpcompressd" : __asm_proxy_VPCOMPRESSD__, + "vpcompressq" : __asm_proxy_VPCOMPRESSQ__, + "vpcomq" : __asm_proxy_VPCOMQ__, + "vpcomub" : __asm_proxy_VPCOMUB__, + "vpcomud" : __asm_proxy_VPCOMUD__, + "vpcomuq" : __asm_proxy_VPCOMUQ__, + "vpcomuw" : __asm_proxy_VPCOMUW__, + "vpcomw" : __asm_proxy_VPCOMW__, + "vpconflictd" : __asm_proxy_VPCONFLICTD__, + "vpconflictq" : __asm_proxy_VPCONFLICTQ__, + "vperm2f128" : __asm_proxy_VPERM2F128__, + "vperm2i128" : __asm_proxy_VPERM2I128__, + "vpermb" : __asm_proxy_VPERMB__, + "vpermd" : __asm_proxy_VPERMD__, + "vpermi2b" : __asm_proxy_VPERMI2B__, + "vpermi2d" : __asm_proxy_VPERMI2D__, + "vpermi2pd" : __asm_proxy_VPERMI2PD__, + "vpermi2ps" : __asm_proxy_VPERMI2PS__, + "vpermi2q" : __asm_proxy_VPERMI2Q__, + "vpermi2w" : __asm_proxy_VPERMI2W__, + "vpermil2pd" : __asm_proxy_VPERMIL2PD__, + "vpermil2ps" : __asm_proxy_VPERMIL2PS__, + "vpermilpd" : __asm_proxy_VPERMILPD__, + "vpermilps" : __asm_proxy_VPERMILPS__, + "vpermpd" : __asm_proxy_VPERMPD__, + "vpermps" : __asm_proxy_VPERMPS__, + "vpermq" : __asm_proxy_VPERMQ__, + "vpermt2b" : __asm_proxy_VPERMT2B__, + "vpermt2d" : __asm_proxy_VPERMT2D__, + "vpermt2pd" : __asm_proxy_VPERMT2PD__, + "vpermt2ps" : __asm_proxy_VPERMT2PS__, + "vpermt2q" : __asm_proxy_VPERMT2Q__, + "vpermt2w" : __asm_proxy_VPERMT2W__, + "vpermw" : __asm_proxy_VPERMW__, + "vpexpandd" : __asm_proxy_VPEXPANDD__, + "vpexpandq" : __asm_proxy_VPEXPANDQ__, + "vpextrb" : __asm_proxy_VPEXTRB__, + "vpextrd" : __asm_proxy_VPEXTRD__, + "vpextrq" : __asm_proxy_VPEXTRQ__, + "vpextrw" : __asm_proxy_VPEXTRW__, + "vpgatherdd" : __asm_proxy_VPGATHERDD__, + "vpgatherdq" : __asm_proxy_VPGATHERDQ__, + "vpgatherqd" : __asm_proxy_VPGATHERQD__, + "vpgatherqq" : __asm_proxy_VPGATHERQQ__, + "vphaddbd" : __asm_proxy_VPHADDBD__, + "vphaddbq" : __asm_proxy_VPHADDBQ__, + "vphaddbw" : __asm_proxy_VPHADDBW__, + "vphaddd" : __asm_proxy_VPHADDD__, + "vphadddq" : __asm_proxy_VPHADDDQ__, + "vphaddsw" : __asm_proxy_VPHADDSW__, + "vphaddubd" : __asm_proxy_VPHADDUBD__, + "vphaddubq" : __asm_proxy_VPHADDUBQ__, + "vphaddubw" : __asm_proxy_VPHADDUBW__, + "vphaddudq" : __asm_proxy_VPHADDUDQ__, + "vphadduwd" : __asm_proxy_VPHADDUWD__, + "vphadduwq" : __asm_proxy_VPHADDUWQ__, + "vphaddw" : __asm_proxy_VPHADDW__, + "vphaddwd" : __asm_proxy_VPHADDWD__, + "vphaddwq" : __asm_proxy_VPHADDWQ__, + "vphminposuw" : __asm_proxy_VPHMINPOSUW__, + "vphsubbw" : __asm_proxy_VPHSUBBW__, + "vphsubd" : __asm_proxy_VPHSUBD__, + "vphsubdq" : __asm_proxy_VPHSUBDQ__, + "vphsubsw" : __asm_proxy_VPHSUBSW__, + "vphsubw" : __asm_proxy_VPHSUBW__, + "vphsubwd" : __asm_proxy_VPHSUBWD__, + "vpinsrb" : __asm_proxy_VPINSRB__, + "vpinsrd" : __asm_proxy_VPINSRD__, + "vpinsrq" : __asm_proxy_VPINSRQ__, + "vpinsrw" : __asm_proxy_VPINSRW__, + "vplzcntd" : __asm_proxy_VPLZCNTD__, + "vplzcntq" : __asm_proxy_VPLZCNTQ__, + "vpmacsdd" : __asm_proxy_VPMACSDD__, + "vpmacsdqh" : __asm_proxy_VPMACSDQH__, + "vpmacsdql" : __asm_proxy_VPMACSDQL__, + "vpmacssdd" : __asm_proxy_VPMACSSDD__, + "vpmacssdqh" : __asm_proxy_VPMACSSDQH__, + "vpmacssdql" : __asm_proxy_VPMACSSDQL__, + "vpmacsswd" : __asm_proxy_VPMACSSWD__, + "vpmacssww" : __asm_proxy_VPMACSSWW__, + "vpmacswd" : __asm_proxy_VPMACSWD__, + "vpmacsww" : __asm_proxy_VPMACSWW__, + "vpmadcsswd" : __asm_proxy_VPMADCSSWD__, + "vpmadcswd" : __asm_proxy_VPMADCSWD__, + "vpmadd52huq" : __asm_proxy_VPMADD52HUQ__, + "vpmadd52luq" : __asm_proxy_VPMADD52LUQ__, + "vpmaddubsw" : __asm_proxy_VPMADDUBSW__, + "vpmaddwd" : __asm_proxy_VPMADDWD__, + "vpmaskmovd" : __asm_proxy_VPMASKMOVD__, + "vpmaskmovq" : __asm_proxy_VPMASKMOVQ__, + "vpmaxsb" : __asm_proxy_VPMAXSB__, + "vpmaxsd" : __asm_proxy_VPMAXSD__, + "vpmaxsq" : __asm_proxy_VPMAXSQ__, + "vpmaxsw" : __asm_proxy_VPMAXSW__, + "vpmaxub" : __asm_proxy_VPMAXUB__, + "vpmaxud" : __asm_proxy_VPMAXUD__, + "vpmaxuq" : __asm_proxy_VPMAXUQ__, + "vpmaxuw" : __asm_proxy_VPMAXUW__, + "vpminsb" : __asm_proxy_VPMINSB__, + "vpminsd" : __asm_proxy_VPMINSD__, + "vpminsq" : __asm_proxy_VPMINSQ__, + "vpminsw" : __asm_proxy_VPMINSW__, + "vpminub" : __asm_proxy_VPMINUB__, + "vpminud" : __asm_proxy_VPMINUD__, + "vpminuq" : __asm_proxy_VPMINUQ__, + "vpminuw" : __asm_proxy_VPMINUW__, + "vpmovb2m" : __asm_proxy_VPMOVB2M__, + "vpmovd2m" : __asm_proxy_VPMOVD2M__, + "vpmovdb" : __asm_proxy_VPMOVDB__, + "vpmovdw" : __asm_proxy_VPMOVDW__, + "vpmovm2b" : __asm_proxy_VPMOVM2B__, + "vpmovm2d" : __asm_proxy_VPMOVM2D__, + "vpmovm2q" : __asm_proxy_VPMOVM2Q__, + "vpmovm2w" : __asm_proxy_VPMOVM2W__, + "vpmovmskb" : __asm_proxy_VPMOVMSKB__, + "vpmovq2m" : __asm_proxy_VPMOVQ2M__, + "vpmovqb" : __asm_proxy_VPMOVQB__, + "vpmovqd" : __asm_proxy_VPMOVQD__, + "vpmovqw" : __asm_proxy_VPMOVQW__, + "vpmovsdb" : __asm_proxy_VPMOVSDB__, + "vpmovsdw" : __asm_proxy_VPMOVSDW__, + "vpmovsqb" : __asm_proxy_VPMOVSQB__, + "vpmovsqd" : __asm_proxy_VPMOVSQD__, + "vpmovsqw" : __asm_proxy_VPMOVSQW__, + "vpmovswb" : __asm_proxy_VPMOVSWB__, + "vpmovsxbd" : __asm_proxy_VPMOVSXBD__, + "vpmovsxbq" : __asm_proxy_VPMOVSXBQ__, + "vpmovsxbw" : __asm_proxy_VPMOVSXBW__, + "vpmovsxdq" : __asm_proxy_VPMOVSXDQ__, + "vpmovsxwd" : __asm_proxy_VPMOVSXWD__, + "vpmovsxwq" : __asm_proxy_VPMOVSXWQ__, + "vpmovusdb" : __asm_proxy_VPMOVUSDB__, + "vpmovusdw" : __asm_proxy_VPMOVUSDW__, + "vpmovusqb" : __asm_proxy_VPMOVUSQB__, + "vpmovusqd" : __asm_proxy_VPMOVUSQD__, + "vpmovusqw" : __asm_proxy_VPMOVUSQW__, + "vpmovuswb" : __asm_proxy_VPMOVUSWB__, + "vpmovw2m" : __asm_proxy_VPMOVW2M__, + "vpmovwb" : __asm_proxy_VPMOVWB__, + "vpmovzxbd" : __asm_proxy_VPMOVZXBD__, + "vpmovzxbq" : __asm_proxy_VPMOVZXBQ__, + "vpmovzxbw" : __asm_proxy_VPMOVZXBW__, + "vpmovzxdq" : __asm_proxy_VPMOVZXDQ__, + "vpmovzxwd" : __asm_proxy_VPMOVZXWD__, + "vpmovzxwq" : __asm_proxy_VPMOVZXWQ__, + "vpmuldq" : __asm_proxy_VPMULDQ__, + "vpmulhrsw" : __asm_proxy_VPMULHRSW__, + "vpmulhuw" : __asm_proxy_VPMULHUW__, + "vpmulhw" : __asm_proxy_VPMULHW__, + "vpmulld" : __asm_proxy_VPMULLD__, + "vpmullq" : __asm_proxy_VPMULLQ__, + "vpmullw" : __asm_proxy_VPMULLW__, + "vpmultishiftqb" : __asm_proxy_VPMULTISHIFTQB__, + "vpmuludq" : __asm_proxy_VPMULUDQ__, + "vpopcntd" : __asm_proxy_VPOPCNTD__, + "vpopcntq" : __asm_proxy_VPOPCNTQ__, + "vpor" : __asm_proxy_VPOR__, + "vpord" : __asm_proxy_VPORD__, + "vporq" : __asm_proxy_VPORQ__, + "vpperm" : __asm_proxy_VPPERM__, + "vprold" : __asm_proxy_VPROLD__, + "vprolq" : __asm_proxy_VPROLQ__, + "vprolvd" : __asm_proxy_VPROLVD__, + "vprolvq" : __asm_proxy_VPROLVQ__, + "vprord" : __asm_proxy_VPRORD__, + "vprorq" : __asm_proxy_VPRORQ__, + "vprorvd" : __asm_proxy_VPRORVD__, + "vprorvq" : __asm_proxy_VPRORVQ__, + "vprotb" : __asm_proxy_VPROTB__, + "vprotd" : __asm_proxy_VPROTD__, + "vprotq" : __asm_proxy_VPROTQ__, + "vprotw" : __asm_proxy_VPROTW__, + "vpsadbw" : __asm_proxy_VPSADBW__, + "vpscatterdd" : __asm_proxy_VPSCATTERDD__, + "vpscatterdq" : __asm_proxy_VPSCATTERDQ__, + "vpscatterqd" : __asm_proxy_VPSCATTERQD__, + "vpscatterqq" : __asm_proxy_VPSCATTERQQ__, + "vpshab" : __asm_proxy_VPSHAB__, + "vpshad" : __asm_proxy_VPSHAD__, + "vpshaq" : __asm_proxy_VPSHAQ__, + "vpshaw" : __asm_proxy_VPSHAW__, + "vpshlb" : __asm_proxy_VPSHLB__, + "vpshld" : __asm_proxy_VPSHLD__, + "vpshlq" : __asm_proxy_VPSHLQ__, + "vpshlw" : __asm_proxy_VPSHLW__, + "vpshufb" : __asm_proxy_VPSHUFB__, + "vpshufd" : __asm_proxy_VPSHUFD__, + "vpshufhw" : __asm_proxy_VPSHUFHW__, + "vpshuflw" : __asm_proxy_VPSHUFLW__, + "vpsignb" : __asm_proxy_VPSIGNB__, + "vpsignd" : __asm_proxy_VPSIGND__, + "vpsignw" : __asm_proxy_VPSIGNW__, + "vpslld" : __asm_proxy_VPSLLD__, + "vpslldq" : __asm_proxy_VPSLLDQ__, + "vpsllq" : __asm_proxy_VPSLLQ__, + "vpsllvd" : __asm_proxy_VPSLLVD__, + "vpsllvq" : __asm_proxy_VPSLLVQ__, + "vpsllvw" : __asm_proxy_VPSLLVW__, + "vpsllw" : __asm_proxy_VPSLLW__, + "vpsrad" : __asm_proxy_VPSRAD__, + "vpsraq" : __asm_proxy_VPSRAQ__, + "vpsravd" : __asm_proxy_VPSRAVD__, + "vpsravq" : __asm_proxy_VPSRAVQ__, + "vpsravw" : __asm_proxy_VPSRAVW__, + "vpsraw" : __asm_proxy_VPSRAW__, + "vpsrld" : __asm_proxy_VPSRLD__, + "vpsrldq" : __asm_proxy_VPSRLDQ__, + "vpsrlq" : __asm_proxy_VPSRLQ__, + "vpsrlvd" : __asm_proxy_VPSRLVD__, + "vpsrlvq" : __asm_proxy_VPSRLVQ__, + "vpsrlvw" : __asm_proxy_VPSRLVW__, + "vpsrlw" : __asm_proxy_VPSRLW__, + "vpsubb" : __asm_proxy_VPSUBB__, + "vpsubd" : __asm_proxy_VPSUBD__, + "vpsubq" : __asm_proxy_VPSUBQ__, + "vpsubsb" : __asm_proxy_VPSUBSB__, + "vpsubsw" : __asm_proxy_VPSUBSW__, + "vpsubusb" : __asm_proxy_VPSUBUSB__, + "vpsubusw" : __asm_proxy_VPSUBUSW__, + "vpsubw" : __asm_proxy_VPSUBW__, + "vpternlogd" : __asm_proxy_VPTERNLOGD__, + "vpternlogq" : __asm_proxy_VPTERNLOGQ__, + "vptest" : __asm_proxy_VPTEST__, + "vptestmb" : __asm_proxy_VPTESTMB__, + "vptestmd" : __asm_proxy_VPTESTMD__, + "vptestmq" : __asm_proxy_VPTESTMQ__, + "vptestmw" : __asm_proxy_VPTESTMW__, + "vptestnmb" : __asm_proxy_VPTESTNMB__, + "vptestnmd" : __asm_proxy_VPTESTNMD__, + "vptestnmq" : __asm_proxy_VPTESTNMQ__, + "vptestnmw" : __asm_proxy_VPTESTNMW__, + "vpunpckhbw" : __asm_proxy_VPUNPCKHBW__, + "vpunpckhdq" : __asm_proxy_VPUNPCKHDQ__, + "vpunpckhqdq" : __asm_proxy_VPUNPCKHQDQ__, + "vpunpckhwd" : __asm_proxy_VPUNPCKHWD__, + "vpunpcklbw" : __asm_proxy_VPUNPCKLBW__, + "vpunpckldq" : __asm_proxy_VPUNPCKLDQ__, + "vpunpcklqdq" : __asm_proxy_VPUNPCKLQDQ__, + "vpunpcklwd" : __asm_proxy_VPUNPCKLWD__, + "vpxor" : __asm_proxy_VPXOR__, + "vpxord" : __asm_proxy_VPXORD__, + "vpxorq" : __asm_proxy_VPXORQ__, + "vrangepd" : __asm_proxy_VRANGEPD__, + "vrangeps" : __asm_proxy_VRANGEPS__, + "vrangesd" : __asm_proxy_VRANGESD__, + "vrangess" : __asm_proxy_VRANGESS__, + "vrcp14pd" : __asm_proxy_VRCP14PD__, + "vrcp14ps" : __asm_proxy_VRCP14PS__, + "vrcp14sd" : __asm_proxy_VRCP14SD__, + "vrcp14ss" : __asm_proxy_VRCP14SS__, + "vrcp28pd" : __asm_proxy_VRCP28PD__, + "vrcp28ps" : __asm_proxy_VRCP28PS__, + "vrcp28sd" : __asm_proxy_VRCP28SD__, + "vrcp28ss" : __asm_proxy_VRCP28SS__, + "vrcpps" : __asm_proxy_VRCPPS__, + "vrcpss" : __asm_proxy_VRCPSS__, + "vreducepd" : __asm_proxy_VREDUCEPD__, + "vreduceps" : __asm_proxy_VREDUCEPS__, + "vreducesd" : __asm_proxy_VREDUCESD__, + "vreducess" : __asm_proxy_VREDUCESS__, + "vrndscalepd" : __asm_proxy_VRNDSCALEPD__, + "vrndscaleps" : __asm_proxy_VRNDSCALEPS__, + "vrndscalesd" : __asm_proxy_VRNDSCALESD__, + "vrndscaless" : __asm_proxy_VRNDSCALESS__, + "vroundpd" : __asm_proxy_VROUNDPD__, + "vroundps" : __asm_proxy_VROUNDPS__, + "vroundsd" : __asm_proxy_VROUNDSD__, + "vroundss" : __asm_proxy_VROUNDSS__, + "vrsqrt14pd" : __asm_proxy_VRSQRT14PD__, + "vrsqrt14ps" : __asm_proxy_VRSQRT14PS__, + "vrsqrt14sd" : __asm_proxy_VRSQRT14SD__, + "vrsqrt14ss" : __asm_proxy_VRSQRT14SS__, + "vrsqrt28pd" : __asm_proxy_VRSQRT28PD__, + "vrsqrt28ps" : __asm_proxy_VRSQRT28PS__, + "vrsqrt28sd" : __asm_proxy_VRSQRT28SD__, + "vrsqrt28ss" : __asm_proxy_VRSQRT28SS__, + "vrsqrtps" : __asm_proxy_VRSQRTPS__, + "vrsqrtss" : __asm_proxy_VRSQRTSS__, + "vscalefpd" : __asm_proxy_VSCALEFPD__, + "vscalefps" : __asm_proxy_VSCALEFPS__, + "vscalefsd" : __asm_proxy_VSCALEFSD__, + "vscalefss" : __asm_proxy_VSCALEFSS__, + "vscatterdpd" : __asm_proxy_VSCATTERDPD__, + "vscatterdps" : __asm_proxy_VSCATTERDPS__, + "vscatterpf0dpd" : __asm_proxy_VSCATTERPF0DPD__, + "vscatterpf0dps" : __asm_proxy_VSCATTERPF0DPS__, + "vscatterpf0qpd" : __asm_proxy_VSCATTERPF0QPD__, + "vscatterpf0qps" : __asm_proxy_VSCATTERPF0QPS__, + "vscatterpf1dpd" : __asm_proxy_VSCATTERPF1DPD__, + "vscatterpf1dps" : __asm_proxy_VSCATTERPF1DPS__, + "vscatterpf1qpd" : __asm_proxy_VSCATTERPF1QPD__, + "vscatterpf1qps" : __asm_proxy_VSCATTERPF1QPS__, + "vscatterqpd" : __asm_proxy_VSCATTERQPD__, + "vscatterqps" : __asm_proxy_VSCATTERQPS__, + "vshuff32x4" : __asm_proxy_VSHUFF32X4__, + "vshuff64x2" : __asm_proxy_VSHUFF64X2__, + "vshufi32x4" : __asm_proxy_VSHUFI32X4__, + "vshufi64x2" : __asm_proxy_VSHUFI64X2__, + "vshufpd" : __asm_proxy_VSHUFPD__, + "vshufps" : __asm_proxy_VSHUFPS__, + "vsqrtpd" : __asm_proxy_VSQRTPD__, + "vsqrtps" : __asm_proxy_VSQRTPS__, + "vsqrtsd" : __asm_proxy_VSQRTSD__, + "vsqrtss" : __asm_proxy_VSQRTSS__, + "vstmxcsr" : __asm_proxy_VSTMXCSR__, + "vsubpd" : __asm_proxy_VSUBPD__, + "vsubps" : __asm_proxy_VSUBPS__, + "vsubsd" : __asm_proxy_VSUBSD__, + "vsubss" : __asm_proxy_VSUBSS__, + "vtestpd" : __asm_proxy_VTESTPD__, + "vtestps" : __asm_proxy_VTESTPS__, + "vucomisd" : __asm_proxy_VUCOMISD__, + "vucomiss" : __asm_proxy_VUCOMISS__, + "vunpckhpd" : __asm_proxy_VUNPCKHPD__, + "vunpckhps" : __asm_proxy_VUNPCKHPS__, + "vunpcklpd" : __asm_proxy_VUNPCKLPD__, + "vunpcklps" : __asm_proxy_VUNPCKLPS__, + "vxorpd" : __asm_proxy_VXORPD__, + "vxorps" : __asm_proxy_VXORPS__, + "vzeroall" : __asm_proxy_VZEROALL__, + "vzeroupper" : __asm_proxy_VZEROUPPER__, + "xaddb" : __asm_proxy_XADDB__, + "xaddl" : __asm_proxy_XADDL__, + "xaddq" : __asm_proxy_XADDQ__, + "xaddw" : __asm_proxy_XADDW__, + "xchgb" : __asm_proxy_XCHGB__, + "xchgl" : __asm_proxy_XCHGL__, + "xchgq" : __asm_proxy_XCHGQ__, + "xchgw" : __asm_proxy_XCHGW__, + "xgetbv" : __asm_proxy_XGETBV__, + "xlatb" : __asm_proxy_XLATB__, + "xorb" : __asm_proxy_XORB__, + "xorl" : __asm_proxy_XORL__, + "xorpd" : __asm_proxy_XORPD__, + "xorps" : __asm_proxy_XORPS__, + "xorq" : __asm_proxy_XORQ__, + "xorw" : __asm_proxy_XORW__, +} + +func __asm_proxy_ADCB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADCB(v[0], v[1]) + } else { + panic("instruction ADCB takes exactly 2 operands") + } +} + +func __asm_proxy_ADCL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADCL(v[0], v[1]) + } else { + panic("instruction ADCL takes exactly 2 operands") + } +} + +func __asm_proxy_ADCQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADCQ(v[0], v[1]) + } else { + panic("instruction ADCQ takes exactly 2 operands") + } +} + +func __asm_proxy_ADCW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADCW(v[0], v[1]) + } else { + panic("instruction ADCW takes exactly 2 operands") + } +} + +func __asm_proxy_ADCXL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADCXL(v[0], v[1]) + } else { + panic("instruction ADCXL takes exactly 2 operands") + } +} + +func __asm_proxy_ADCXQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADCXQ(v[0], v[1]) + } else { + panic("instruction ADCXQ takes exactly 2 operands") + } +} + +func __asm_proxy_ADDB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADDB(v[0], v[1]) + } else { + panic("instruction ADDB takes exactly 2 operands") + } +} + +func __asm_proxy_ADDL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADDL(v[0], v[1]) + } else { + panic("instruction ADDL takes exactly 2 operands") + } +} + +func __asm_proxy_ADDPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADDPD(v[0], v[1]) + } else { + panic("instruction ADDPD takes exactly 2 operands") + } +} + +func __asm_proxy_ADDPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADDPS(v[0], v[1]) + } else { + panic("instruction ADDPS takes exactly 2 operands") + } +} + +func __asm_proxy_ADDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADDQ(v[0], v[1]) + } else { + panic("instruction ADDQ takes exactly 2 operands") + } +} + +func __asm_proxy_ADDSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADDSD(v[0], v[1]) + } else { + panic("instruction ADDSD takes exactly 2 operands") + } +} + +func __asm_proxy_ADDSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADDSS(v[0], v[1]) + } else { + panic("instruction ADDSS takes exactly 2 operands") + } +} + +func __asm_proxy_ADDSUBPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADDSUBPD(v[0], v[1]) + } else { + panic("instruction ADDSUBPD takes exactly 2 operands") + } +} + +func __asm_proxy_ADDSUBPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADDSUBPS(v[0], v[1]) + } else { + panic("instruction ADDSUBPS takes exactly 2 operands") + } +} + +func __asm_proxy_ADDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADDW(v[0], v[1]) + } else { + panic("instruction ADDW takes exactly 2 operands") + } +} + +func __asm_proxy_ADOXL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADOXL(v[0], v[1]) + } else { + panic("instruction ADOXL takes exactly 2 operands") + } +} + +func __asm_proxy_ADOXQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ADOXQ(v[0], v[1]) + } else { + panic("instruction ADOXQ takes exactly 2 operands") + } +} + +func __asm_proxy_AESDEC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.AESDEC(v[0], v[1]) + } else { + panic("instruction AESDEC takes exactly 2 operands") + } +} + +func __asm_proxy_AESDECLAST__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.AESDECLAST(v[0], v[1]) + } else { + panic("instruction AESDECLAST takes exactly 2 operands") + } +} + +func __asm_proxy_AESENC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.AESENC(v[0], v[1]) + } else { + panic("instruction AESENC takes exactly 2 operands") + } +} + +func __asm_proxy_AESENCLAST__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.AESENCLAST(v[0], v[1]) + } else { + panic("instruction AESENCLAST takes exactly 2 operands") + } +} + +func __asm_proxy_AESIMC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.AESIMC(v[0], v[1]) + } else { + panic("instruction AESIMC takes exactly 2 operands") + } +} + +func __asm_proxy_AESKEYGENASSIST__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.AESKEYGENASSIST(v[0], v[1], v[2]) + } else { + panic("instruction AESKEYGENASSIST takes exactly 3 operands") + } +} + +func __asm_proxy_ANDB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ANDB(v[0], v[1]) + } else { + panic("instruction ANDB takes exactly 2 operands") + } +} + +func __asm_proxy_ANDL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ANDL(v[0], v[1]) + } else { + panic("instruction ANDL takes exactly 2 operands") + } +} + +func __asm_proxy_ANDNL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.ANDNL(v[0], v[1], v[2]) + } else { + panic("instruction ANDNL takes exactly 3 operands") + } +} + +func __asm_proxy_ANDNPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ANDNPD(v[0], v[1]) + } else { + panic("instruction ANDNPD takes exactly 2 operands") + } +} + +func __asm_proxy_ANDNPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ANDNPS(v[0], v[1]) + } else { + panic("instruction ANDNPS takes exactly 2 operands") + } +} + +func __asm_proxy_ANDNQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.ANDNQ(v[0], v[1], v[2]) + } else { + panic("instruction ANDNQ takes exactly 3 operands") + } +} + +func __asm_proxy_ANDPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ANDPD(v[0], v[1]) + } else { + panic("instruction ANDPD takes exactly 2 operands") + } +} + +func __asm_proxy_ANDPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ANDPS(v[0], v[1]) + } else { + panic("instruction ANDPS takes exactly 2 operands") + } +} + +func __asm_proxy_ANDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ANDQ(v[0], v[1]) + } else { + panic("instruction ANDQ takes exactly 2 operands") + } +} + +func __asm_proxy_ANDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ANDW(v[0], v[1]) + } else { + panic("instruction ANDW takes exactly 2 operands") + } +} + +func __asm_proxy_BEXTR__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.BEXTR(v[0], v[1], v[2]) + } else { + panic("instruction BEXTR takes exactly 3 operands") + } +} + +func __asm_proxy_BLCFILL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BLCFILL(v[0], v[1]) + } else { + panic("instruction BLCFILL takes exactly 2 operands") + } +} + +func __asm_proxy_BLCI__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BLCI(v[0], v[1]) + } else { + panic("instruction BLCI takes exactly 2 operands") + } +} + +func __asm_proxy_BLCIC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BLCIC(v[0], v[1]) + } else { + panic("instruction BLCIC takes exactly 2 operands") + } +} + +func __asm_proxy_BLCMSK__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BLCMSK(v[0], v[1]) + } else { + panic("instruction BLCMSK takes exactly 2 operands") + } +} + +func __asm_proxy_BLCS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BLCS(v[0], v[1]) + } else { + panic("instruction BLCS takes exactly 2 operands") + } +} + +func __asm_proxy_BLENDPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.BLENDPD(v[0], v[1], v[2]) + } else { + panic("instruction BLENDPD takes exactly 3 operands") + } +} + +func __asm_proxy_BLENDPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.BLENDPS(v[0], v[1], v[2]) + } else { + panic("instruction BLENDPS takes exactly 3 operands") + } +} + +func __asm_proxy_BLENDVPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.BLENDVPD(v[0], v[1], v[2]) + } else { + panic("instruction BLENDVPD takes exactly 3 operands") + } +} + +func __asm_proxy_BLENDVPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.BLENDVPS(v[0], v[1], v[2]) + } else { + panic("instruction BLENDVPS takes exactly 3 operands") + } +} + +func __asm_proxy_BLSFILL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BLSFILL(v[0], v[1]) + } else { + panic("instruction BLSFILL takes exactly 2 operands") + } +} + +func __asm_proxy_BLSI__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BLSI(v[0], v[1]) + } else { + panic("instruction BLSI takes exactly 2 operands") + } +} + +func __asm_proxy_BLSIC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BLSIC(v[0], v[1]) + } else { + panic("instruction BLSIC takes exactly 2 operands") + } +} + +func __asm_proxy_BLSMSK__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BLSMSK(v[0], v[1]) + } else { + panic("instruction BLSMSK takes exactly 2 operands") + } +} + +func __asm_proxy_BLSR__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BLSR(v[0], v[1]) + } else { + panic("instruction BLSR takes exactly 2 operands") + } +} + +func __asm_proxy_BSFL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BSFL(v[0], v[1]) + } else { + panic("instruction BSFL takes exactly 2 operands") + } +} + +func __asm_proxy_BSFQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BSFQ(v[0], v[1]) + } else { + panic("instruction BSFQ takes exactly 2 operands") + } +} + +func __asm_proxy_BSFW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BSFW(v[0], v[1]) + } else { + panic("instruction BSFW takes exactly 2 operands") + } +} + +func __asm_proxy_BSRL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BSRL(v[0], v[1]) + } else { + panic("instruction BSRL takes exactly 2 operands") + } +} + +func __asm_proxy_BSRQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BSRQ(v[0], v[1]) + } else { + panic("instruction BSRQ takes exactly 2 operands") + } +} + +func __asm_proxy_BSRW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BSRW(v[0], v[1]) + } else { + panic("instruction BSRW takes exactly 2 operands") + } +} + +func __asm_proxy_BSWAPL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.BSWAPL(v[0]) + } else { + panic("instruction BSWAPL takes exactly 1 operand") + } +} + +func __asm_proxy_BSWAPQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.BSWAPQ(v[0]) + } else { + panic("instruction BSWAPQ takes exactly 1 operand") + } +} + +func __asm_proxy_BTCL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BTCL(v[0], v[1]) + } else { + panic("instruction BTCL takes exactly 2 operands") + } +} + +func __asm_proxy_BTCQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BTCQ(v[0], v[1]) + } else { + panic("instruction BTCQ takes exactly 2 operands") + } +} + +func __asm_proxy_BTCW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BTCW(v[0], v[1]) + } else { + panic("instruction BTCW takes exactly 2 operands") + } +} + +func __asm_proxy_BTL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BTL(v[0], v[1]) + } else { + panic("instruction BTL takes exactly 2 operands") + } +} + +func __asm_proxy_BTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BTQ(v[0], v[1]) + } else { + panic("instruction BTQ takes exactly 2 operands") + } +} + +func __asm_proxy_BTRL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BTRL(v[0], v[1]) + } else { + panic("instruction BTRL takes exactly 2 operands") + } +} + +func __asm_proxy_BTRQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BTRQ(v[0], v[1]) + } else { + panic("instruction BTRQ takes exactly 2 operands") + } +} + +func __asm_proxy_BTRW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BTRW(v[0], v[1]) + } else { + panic("instruction BTRW takes exactly 2 operands") + } +} + +func __asm_proxy_BTSL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BTSL(v[0], v[1]) + } else { + panic("instruction BTSL takes exactly 2 operands") + } +} + +func __asm_proxy_BTSQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BTSQ(v[0], v[1]) + } else { + panic("instruction BTSQ takes exactly 2 operands") + } +} + +func __asm_proxy_BTSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BTSW(v[0], v[1]) + } else { + panic("instruction BTSW takes exactly 2 operands") + } +} + +func __asm_proxy_BTW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.BTW(v[0], v[1]) + } else { + panic("instruction BTW takes exactly 2 operands") + } +} + +func __asm_proxy_BZHI__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.BZHI(v[0], v[1], v[2]) + } else { + panic("instruction BZHI takes exactly 3 operands") + } +} + +func __asm_proxy_CALL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.CALL(v[0]) + } else { + panic("instruction CALL takes exactly 1 operand") + } +} + +func __asm_proxy_CALLQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.CALLQ(v[0]) + } else { + panic("instruction CALLQ takes exactly 1 operand") + } +} + +func __asm_proxy_CBTW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.CBTW() + } else { + panic("instruction CBTW takes no operands") + } +} + +func __asm_proxy_CLC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.CLC() + } else { + panic("instruction CLC takes no operands") + } +} + +func __asm_proxy_CLD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.CLD() + } else { + panic("instruction CLD takes no operands") + } +} + +func __asm_proxy_CLFLUSH__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.CLFLUSH(v[0]) + } else { + panic("instruction CLFLUSH takes exactly 1 operand") + } +} + +func __asm_proxy_CLFLUSHOPT__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.CLFLUSHOPT(v[0]) + } else { + panic("instruction CLFLUSHOPT takes exactly 1 operand") + } +} + +func __asm_proxy_CLTD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.CLTD() + } else { + panic("instruction CLTD takes no operands") + } +} + +func __asm_proxy_CLTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.CLTQ() + } else { + panic("instruction CLTQ takes no operands") + } +} + +func __asm_proxy_CLWB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.CLWB(v[0]) + } else { + panic("instruction CLWB takes exactly 1 operand") + } +} + +func __asm_proxy_CLZERO__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.CLZERO() + } else { + panic("instruction CLZERO takes no operands") + } +} + +func __asm_proxy_CMC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.CMC() + } else { + panic("instruction CMC takes no operands") + } +} + +func __asm_proxy_CMOVA__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVA(v[0], v[1]) + } else { + panic("instruction CMOVA takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVAE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVAE(v[0], v[1]) + } else { + panic("instruction CMOVAE takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVB(v[0], v[1]) + } else { + panic("instruction CMOVB takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVBE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVBE(v[0], v[1]) + } else { + panic("instruction CMOVBE takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVC(v[0], v[1]) + } else { + panic("instruction CMOVC takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVE(v[0], v[1]) + } else { + panic("instruction CMOVE takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVG__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVG(v[0], v[1]) + } else { + panic("instruction CMOVG takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVGE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVGE(v[0], v[1]) + } else { + panic("instruction CMOVGE takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVL(v[0], v[1]) + } else { + panic("instruction CMOVL takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVLE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVLE(v[0], v[1]) + } else { + panic("instruction CMOVLE takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVNA__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVNA(v[0], v[1]) + } else { + panic("instruction CMOVNA takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVNAE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVNAE(v[0], v[1]) + } else { + panic("instruction CMOVNAE takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVNB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVNB(v[0], v[1]) + } else { + panic("instruction CMOVNB takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVNBE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVNBE(v[0], v[1]) + } else { + panic("instruction CMOVNBE takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVNC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVNC(v[0], v[1]) + } else { + panic("instruction CMOVNC takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVNE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVNE(v[0], v[1]) + } else { + panic("instruction CMOVNE takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVNG__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVNG(v[0], v[1]) + } else { + panic("instruction CMOVNG takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVNGE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVNGE(v[0], v[1]) + } else { + panic("instruction CMOVNGE takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVNL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVNL(v[0], v[1]) + } else { + panic("instruction CMOVNL takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVNLE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVNLE(v[0], v[1]) + } else { + panic("instruction CMOVNLE takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVNO__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVNO(v[0], v[1]) + } else { + panic("instruction CMOVNO takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVNP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVNP(v[0], v[1]) + } else { + panic("instruction CMOVNP takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVNS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVNS(v[0], v[1]) + } else { + panic("instruction CMOVNS takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVNZ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVNZ(v[0], v[1]) + } else { + panic("instruction CMOVNZ takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVO__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVO(v[0], v[1]) + } else { + panic("instruction CMOVO takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVP(v[0], v[1]) + } else { + panic("instruction CMOVP takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVPE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVPE(v[0], v[1]) + } else { + panic("instruction CMOVPE takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVPO__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVPO(v[0], v[1]) + } else { + panic("instruction CMOVPO takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVS(v[0], v[1]) + } else { + panic("instruction CMOVS takes exactly 2 operands") + } +} + +func __asm_proxy_CMOVZ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMOVZ(v[0], v[1]) + } else { + panic("instruction CMOVZ takes exactly 2 operands") + } +} + +func __asm_proxy_CMPB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMPB(v[0], v[1]) + } else { + panic("instruction CMPB takes exactly 2 operands") + } +} + +func __asm_proxy_CMPL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMPL(v[0], v[1]) + } else { + panic("instruction CMPL takes exactly 2 operands") + } +} + +func __asm_proxy_CMPPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.CMPPD(v[0], v[1], v[2]) + } else { + panic("instruction CMPPD takes exactly 3 operands") + } +} + +func __asm_proxy_CMPPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.CMPPS(v[0], v[1], v[2]) + } else { + panic("instruction CMPPS takes exactly 3 operands") + } +} + +func __asm_proxy_CMPQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMPQ(v[0], v[1]) + } else { + panic("instruction CMPQ takes exactly 2 operands") + } +} + +func __asm_proxy_CMPSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.CMPSD(v[0], v[1], v[2]) + } else { + panic("instruction CMPSD takes exactly 3 operands") + } +} + +func __asm_proxy_CMPSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.CMPSS(v[0], v[1], v[2]) + } else { + panic("instruction CMPSS takes exactly 3 operands") + } +} + +func __asm_proxy_CMPW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMPW(v[0], v[1]) + } else { + panic("instruction CMPW takes exactly 2 operands") + } +} + +func __asm_proxy_CMPXCHG16B__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.CMPXCHG16B(v[0]) + } else { + panic("instruction CMPXCHG16B takes exactly 1 operand") + } +} + +func __asm_proxy_CMPXCHG8B__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.CMPXCHG8B(v[0]) + } else { + panic("instruction CMPXCHG8B takes exactly 1 operand") + } +} + +func __asm_proxy_CMPXCHGB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMPXCHGB(v[0], v[1]) + } else { + panic("instruction CMPXCHGB takes exactly 2 operands") + } +} + +func __asm_proxy_CMPXCHGL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMPXCHGL(v[0], v[1]) + } else { + panic("instruction CMPXCHGL takes exactly 2 operands") + } +} + +func __asm_proxy_CMPXCHGQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMPXCHGQ(v[0], v[1]) + } else { + panic("instruction CMPXCHGQ takes exactly 2 operands") + } +} + +func __asm_proxy_CMPXCHGW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CMPXCHGW(v[0], v[1]) + } else { + panic("instruction CMPXCHGW takes exactly 2 operands") + } +} + +func __asm_proxy_COMISD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.COMISD(v[0], v[1]) + } else { + panic("instruction COMISD takes exactly 2 operands") + } +} + +func __asm_proxy_COMISS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.COMISS(v[0], v[1]) + } else { + panic("instruction COMISS takes exactly 2 operands") + } +} + +func __asm_proxy_CPUID__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.CPUID() + } else { + panic("instruction CPUID takes no operands") + } +} + +func __asm_proxy_CQTO__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.CQTO() + } else { + panic("instruction CQTO takes no operands") + } +} + +func __asm_proxy_CRC32B__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CRC32B(v[0], v[1]) + } else { + panic("instruction CRC32B takes exactly 2 operands") + } +} + +func __asm_proxy_CRC32L__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CRC32L(v[0], v[1]) + } else { + panic("instruction CRC32L takes exactly 2 operands") + } +} + +func __asm_proxy_CRC32Q__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CRC32Q(v[0], v[1]) + } else { + panic("instruction CRC32Q takes exactly 2 operands") + } +} + +func __asm_proxy_CRC32W__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CRC32W(v[0], v[1]) + } else { + panic("instruction CRC32W takes exactly 2 operands") + } +} + +func __asm_proxy_CVTDQ2PD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTDQ2PD(v[0], v[1]) + } else { + panic("instruction CVTDQ2PD takes exactly 2 operands") + } +} + +func __asm_proxy_CVTDQ2PS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTDQ2PS(v[0], v[1]) + } else { + panic("instruction CVTDQ2PS takes exactly 2 operands") + } +} + +func __asm_proxy_CVTPD2DQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTPD2DQ(v[0], v[1]) + } else { + panic("instruction CVTPD2DQ takes exactly 2 operands") + } +} + +func __asm_proxy_CVTPD2PI__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTPD2PI(v[0], v[1]) + } else { + panic("instruction CVTPD2PI takes exactly 2 operands") + } +} + +func __asm_proxy_CVTPD2PS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTPD2PS(v[0], v[1]) + } else { + panic("instruction CVTPD2PS takes exactly 2 operands") + } +} + +func __asm_proxy_CVTPI2PD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTPI2PD(v[0], v[1]) + } else { + panic("instruction CVTPI2PD takes exactly 2 operands") + } +} + +func __asm_proxy_CVTPI2PS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTPI2PS(v[0], v[1]) + } else { + panic("instruction CVTPI2PS takes exactly 2 operands") + } +} + +func __asm_proxy_CVTPS2DQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTPS2DQ(v[0], v[1]) + } else { + panic("instruction CVTPS2DQ takes exactly 2 operands") + } +} + +func __asm_proxy_CVTPS2PD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTPS2PD(v[0], v[1]) + } else { + panic("instruction CVTPS2PD takes exactly 2 operands") + } +} + +func __asm_proxy_CVTPS2PI__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTPS2PI(v[0], v[1]) + } else { + panic("instruction CVTPS2PI takes exactly 2 operands") + } +} + +func __asm_proxy_CVTSD2SI__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTSD2SI(v[0], v[1]) + } else { + panic("instruction CVTSD2SI takes exactly 2 operands") + } +} + +func __asm_proxy_CVTSD2SS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTSD2SS(v[0], v[1]) + } else { + panic("instruction CVTSD2SS takes exactly 2 operands") + } +} + +func __asm_proxy_CVTSI2SD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTSI2SD(v[0], v[1]) + } else { + panic("instruction CVTSI2SD takes exactly 2 operands") + } +} + +func __asm_proxy_CVTSI2SS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTSI2SS(v[0], v[1]) + } else { + panic("instruction CVTSI2SS takes exactly 2 operands") + } +} + +func __asm_proxy_CVTSS2SD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTSS2SD(v[0], v[1]) + } else { + panic("instruction CVTSS2SD takes exactly 2 operands") + } +} + +func __asm_proxy_CVTSS2SI__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTSS2SI(v[0], v[1]) + } else { + panic("instruction CVTSS2SI takes exactly 2 operands") + } +} + +func __asm_proxy_CVTTPD2DQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTTPD2DQ(v[0], v[1]) + } else { + panic("instruction CVTTPD2DQ takes exactly 2 operands") + } +} + +func __asm_proxy_CVTTPD2PI__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTTPD2PI(v[0], v[1]) + } else { + panic("instruction CVTTPD2PI takes exactly 2 operands") + } +} + +func __asm_proxy_CVTTPS2DQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTTPS2DQ(v[0], v[1]) + } else { + panic("instruction CVTTPS2DQ takes exactly 2 operands") + } +} + +func __asm_proxy_CVTTPS2PI__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTTPS2PI(v[0], v[1]) + } else { + panic("instruction CVTTPS2PI takes exactly 2 operands") + } +} + +func __asm_proxy_CVTTSD2SI__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTTSD2SI(v[0], v[1]) + } else { + panic("instruction CVTTSD2SI takes exactly 2 operands") + } +} + +func __asm_proxy_CVTTSS2SI__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.CVTTSS2SI(v[0], v[1]) + } else { + panic("instruction CVTTSS2SI takes exactly 2 operands") + } +} + +func __asm_proxy_CWTD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.CWTD() + } else { + panic("instruction CWTD takes no operands") + } +} + +func __asm_proxy_CWTL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.CWTL() + } else { + panic("instruction CWTL takes no operands") + } +} + +func __asm_proxy_DECB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.DECB(v[0]) + } else { + panic("instruction DECB takes exactly 1 operand") + } +} + +func __asm_proxy_DECL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.DECL(v[0]) + } else { + panic("instruction DECL takes exactly 1 operand") + } +} + +func __asm_proxy_DECQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.DECQ(v[0]) + } else { + panic("instruction DECQ takes exactly 1 operand") + } +} + +func __asm_proxy_DECW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.DECW(v[0]) + } else { + panic("instruction DECW takes exactly 1 operand") + } +} + +func __asm_proxy_DIVB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.DIVB(v[0]) + } else { + panic("instruction DIVB takes exactly 1 operand") + } +} + +func __asm_proxy_DIVL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.DIVL(v[0]) + } else { + panic("instruction DIVL takes exactly 1 operand") + } +} + +func __asm_proxy_DIVPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.DIVPD(v[0], v[1]) + } else { + panic("instruction DIVPD takes exactly 2 operands") + } +} + +func __asm_proxy_DIVPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.DIVPS(v[0], v[1]) + } else { + panic("instruction DIVPS takes exactly 2 operands") + } +} + +func __asm_proxy_DIVQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.DIVQ(v[0]) + } else { + panic("instruction DIVQ takes exactly 1 operand") + } +} + +func __asm_proxy_DIVSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.DIVSD(v[0], v[1]) + } else { + panic("instruction DIVSD takes exactly 2 operands") + } +} + +func __asm_proxy_DIVSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.DIVSS(v[0], v[1]) + } else { + panic("instruction DIVSS takes exactly 2 operands") + } +} + +func __asm_proxy_DIVW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.DIVW(v[0]) + } else { + panic("instruction DIVW takes exactly 1 operand") + } +} + +func __asm_proxy_DPPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.DPPD(v[0], v[1], v[2]) + } else { + panic("instruction DPPD takes exactly 3 operands") + } +} + +func __asm_proxy_DPPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.DPPS(v[0], v[1], v[2]) + } else { + panic("instruction DPPS takes exactly 3 operands") + } +} + +func __asm_proxy_EMMS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.EMMS() + } else { + panic("instruction EMMS takes no operands") + } +} + +func __asm_proxy_EXTRACTPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.EXTRACTPS(v[0], v[1], v[2]) + } else { + panic("instruction EXTRACTPS takes exactly 3 operands") + } +} + +func __asm_proxy_EXTRQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.EXTRQ(v[0], v[1]) + case 3 : return p.EXTRQ(v[0], v[1], v[2]) + default : panic("instruction EXTRQ takes 2 or 3 operands") + } +} + +func __asm_proxy_FEMMS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.FEMMS() + } else { + panic("instruction FEMMS takes no operands") + } +} + +func __asm_proxy_HADDPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.HADDPD(v[0], v[1]) + } else { + panic("instruction HADDPD takes exactly 2 operands") + } +} + +func __asm_proxy_HADDPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.HADDPS(v[0], v[1]) + } else { + panic("instruction HADDPS takes exactly 2 operands") + } +} + +func __asm_proxy_HSUBPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.HSUBPD(v[0], v[1]) + } else { + panic("instruction HSUBPD takes exactly 2 operands") + } +} + +func __asm_proxy_HSUBPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.HSUBPS(v[0], v[1]) + } else { + panic("instruction HSUBPS takes exactly 2 operands") + } +} + +func __asm_proxy_IDIVB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.IDIVB(v[0]) + } else { + panic("instruction IDIVB takes exactly 1 operand") + } +} + +func __asm_proxy_IDIVL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.IDIVL(v[0]) + } else { + panic("instruction IDIVL takes exactly 1 operand") + } +} + +func __asm_proxy_IDIVQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.IDIVQ(v[0]) + } else { + panic("instruction IDIVQ takes exactly 1 operand") + } +} + +func __asm_proxy_IDIVW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.IDIVW(v[0]) + } else { + panic("instruction IDIVW takes exactly 1 operand") + } +} + +func __asm_proxy_IMULB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.IMULB(v[0]) + } else { + panic("instruction IMULB takes exactly 1 operand") + } +} + +func __asm_proxy_IMULL__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 1 : return p.IMULL(v[0]) + case 2 : return p.IMULL(v[0], v[1]) + case 3 : return p.IMULL(v[0], v[1], v[2]) + default : panic("instruction IMULL takes 1 or 2 or 3 operands") + } +} + +func __asm_proxy_IMULQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 1 : return p.IMULQ(v[0]) + case 2 : return p.IMULQ(v[0], v[1]) + case 3 : return p.IMULQ(v[0], v[1], v[2]) + default : panic("instruction IMULQ takes 1 or 2 or 3 operands") + } +} + +func __asm_proxy_IMULW__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 1 : return p.IMULW(v[0]) + case 2 : return p.IMULW(v[0], v[1]) + case 3 : return p.IMULW(v[0], v[1], v[2]) + default : panic("instruction IMULW takes 1 or 2 or 3 operands") + } +} + +func __asm_proxy_INCB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.INCB(v[0]) + } else { + panic("instruction INCB takes exactly 1 operand") + } +} + +func __asm_proxy_INCL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.INCL(v[0]) + } else { + panic("instruction INCL takes exactly 1 operand") + } +} + +func __asm_proxy_INCQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.INCQ(v[0]) + } else { + panic("instruction INCQ takes exactly 1 operand") + } +} + +func __asm_proxy_INCW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.INCW(v[0]) + } else { + panic("instruction INCW takes exactly 1 operand") + } +} + +func __asm_proxy_INSERTPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.INSERTPS(v[0], v[1], v[2]) + } else { + panic("instruction INSERTPS takes exactly 3 operands") + } +} + +func __asm_proxy_INSERTQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.INSERTQ(v[0], v[1]) + case 4 : return p.INSERTQ(v[0], v[1], v[2], v[3]) + default : panic("instruction INSERTQ takes 2 or 4 operands") + } +} + +func __asm_proxy_INT__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.INT(v[0]) + } else { + panic("instruction INT takes exactly 1 operand") + } +} + +func __asm_proxy_JA__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JA(v[0]) + } else { + panic("instruction JA takes exactly 1 operand") + } +} + +func __asm_proxy_JAE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JAE(v[0]) + } else { + panic("instruction JAE takes exactly 1 operand") + } +} + +func __asm_proxy_JB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JB(v[0]) + } else { + panic("instruction JB takes exactly 1 operand") + } +} + +func __asm_proxy_JBE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JBE(v[0]) + } else { + panic("instruction JBE takes exactly 1 operand") + } +} + +func __asm_proxy_JC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JC(v[0]) + } else { + panic("instruction JC takes exactly 1 operand") + } +} + +func __asm_proxy_JE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JE(v[0]) + } else { + panic("instruction JE takes exactly 1 operand") + } +} + +func __asm_proxy_JECXZ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JECXZ(v[0]) + } else { + panic("instruction JECXZ takes exactly 1 operand") + } +} + +func __asm_proxy_JG__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JG(v[0]) + } else { + panic("instruction JG takes exactly 1 operand") + } +} + +func __asm_proxy_JGE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JGE(v[0]) + } else { + panic("instruction JGE takes exactly 1 operand") + } +} + +func __asm_proxy_JL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JL(v[0]) + } else { + panic("instruction JL takes exactly 1 operand") + } +} + +func __asm_proxy_JLE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JLE(v[0]) + } else { + panic("instruction JLE takes exactly 1 operand") + } +} + +func __asm_proxy_JMP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JMP(v[0]) + } else { + panic("instruction JMP takes exactly 1 operand") + } +} + +func __asm_proxy_JMPQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JMPQ(v[0]) + } else { + panic("instruction JMPQ takes exactly 1 operand") + } +} + +func __asm_proxy_JNA__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JNA(v[0]) + } else { + panic("instruction JNA takes exactly 1 operand") + } +} + +func __asm_proxy_JNAE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JNAE(v[0]) + } else { + panic("instruction JNAE takes exactly 1 operand") + } +} + +func __asm_proxy_JNB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JNB(v[0]) + } else { + panic("instruction JNB takes exactly 1 operand") + } +} + +func __asm_proxy_JNBE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JNBE(v[0]) + } else { + panic("instruction JNBE takes exactly 1 operand") + } +} + +func __asm_proxy_JNC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JNC(v[0]) + } else { + panic("instruction JNC takes exactly 1 operand") + } +} + +func __asm_proxy_JNE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JNE(v[0]) + } else { + panic("instruction JNE takes exactly 1 operand") + } +} + +func __asm_proxy_JNG__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JNG(v[0]) + } else { + panic("instruction JNG takes exactly 1 operand") + } +} + +func __asm_proxy_JNGE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JNGE(v[0]) + } else { + panic("instruction JNGE takes exactly 1 operand") + } +} + +func __asm_proxy_JNL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JNL(v[0]) + } else { + panic("instruction JNL takes exactly 1 operand") + } +} + +func __asm_proxy_JNLE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JNLE(v[0]) + } else { + panic("instruction JNLE takes exactly 1 operand") + } +} + +func __asm_proxy_JNO__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JNO(v[0]) + } else { + panic("instruction JNO takes exactly 1 operand") + } +} + +func __asm_proxy_JNP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JNP(v[0]) + } else { + panic("instruction JNP takes exactly 1 operand") + } +} + +func __asm_proxy_JNS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JNS(v[0]) + } else { + panic("instruction JNS takes exactly 1 operand") + } +} + +func __asm_proxy_JNZ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JNZ(v[0]) + } else { + panic("instruction JNZ takes exactly 1 operand") + } +} + +func __asm_proxy_JO__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JO(v[0]) + } else { + panic("instruction JO takes exactly 1 operand") + } +} + +func __asm_proxy_JP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JP(v[0]) + } else { + panic("instruction JP takes exactly 1 operand") + } +} + +func __asm_proxy_JPE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JPE(v[0]) + } else { + panic("instruction JPE takes exactly 1 operand") + } +} + +func __asm_proxy_JPO__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JPO(v[0]) + } else { + panic("instruction JPO takes exactly 1 operand") + } +} + +func __asm_proxy_JRCXZ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JRCXZ(v[0]) + } else { + panic("instruction JRCXZ takes exactly 1 operand") + } +} + +func __asm_proxy_JS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JS(v[0]) + } else { + panic("instruction JS takes exactly 1 operand") + } +} + +func __asm_proxy_JZ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.JZ(v[0]) + } else { + panic("instruction JZ takes exactly 1 operand") + } +} + +func __asm_proxy_KADDB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KADDB(v[0], v[1], v[2]) + } else { + panic("instruction KADDB takes exactly 3 operands") + } +} + +func __asm_proxy_KADDD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KADDD(v[0], v[1], v[2]) + } else { + panic("instruction KADDD takes exactly 3 operands") + } +} + +func __asm_proxy_KADDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KADDQ(v[0], v[1], v[2]) + } else { + panic("instruction KADDQ takes exactly 3 operands") + } +} + +func __asm_proxy_KADDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KADDW(v[0], v[1], v[2]) + } else { + panic("instruction KADDW takes exactly 3 operands") + } +} + +func __asm_proxy_KANDB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KANDB(v[0], v[1], v[2]) + } else { + panic("instruction KANDB takes exactly 3 operands") + } +} + +func __asm_proxy_KANDD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KANDD(v[0], v[1], v[2]) + } else { + panic("instruction KANDD takes exactly 3 operands") + } +} + +func __asm_proxy_KANDNB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KANDNB(v[0], v[1], v[2]) + } else { + panic("instruction KANDNB takes exactly 3 operands") + } +} + +func __asm_proxy_KANDND__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KANDND(v[0], v[1], v[2]) + } else { + panic("instruction KANDND takes exactly 3 operands") + } +} + +func __asm_proxy_KANDNQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KANDNQ(v[0], v[1], v[2]) + } else { + panic("instruction KANDNQ takes exactly 3 operands") + } +} + +func __asm_proxy_KANDNW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KANDNW(v[0], v[1], v[2]) + } else { + panic("instruction KANDNW takes exactly 3 operands") + } +} + +func __asm_proxy_KANDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KANDQ(v[0], v[1], v[2]) + } else { + panic("instruction KANDQ takes exactly 3 operands") + } +} + +func __asm_proxy_KANDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KANDW(v[0], v[1], v[2]) + } else { + panic("instruction KANDW takes exactly 3 operands") + } +} + +func __asm_proxy_KMOVB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KMOVB(v[0], v[1]) + } else { + panic("instruction KMOVB takes exactly 2 operands") + } +} + +func __asm_proxy_KMOVD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KMOVD(v[0], v[1]) + } else { + panic("instruction KMOVD takes exactly 2 operands") + } +} + +func __asm_proxy_KMOVQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KMOVQ(v[0], v[1]) + } else { + panic("instruction KMOVQ takes exactly 2 operands") + } +} + +func __asm_proxy_KMOVW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KMOVW(v[0], v[1]) + } else { + panic("instruction KMOVW takes exactly 2 operands") + } +} + +func __asm_proxy_KNOTB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KNOTB(v[0], v[1]) + } else { + panic("instruction KNOTB takes exactly 2 operands") + } +} + +func __asm_proxy_KNOTD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KNOTD(v[0], v[1]) + } else { + panic("instruction KNOTD takes exactly 2 operands") + } +} + +func __asm_proxy_KNOTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KNOTQ(v[0], v[1]) + } else { + panic("instruction KNOTQ takes exactly 2 operands") + } +} + +func __asm_proxy_KNOTW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KNOTW(v[0], v[1]) + } else { + panic("instruction KNOTW takes exactly 2 operands") + } +} + +func __asm_proxy_KORB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KORB(v[0], v[1], v[2]) + } else { + panic("instruction KORB takes exactly 3 operands") + } +} + +func __asm_proxy_KORD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KORD(v[0], v[1], v[2]) + } else { + panic("instruction KORD takes exactly 3 operands") + } +} + +func __asm_proxy_KORQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KORQ(v[0], v[1], v[2]) + } else { + panic("instruction KORQ takes exactly 3 operands") + } +} + +func __asm_proxy_KORTESTB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KORTESTB(v[0], v[1]) + } else { + panic("instruction KORTESTB takes exactly 2 operands") + } +} + +func __asm_proxy_KORTESTD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KORTESTD(v[0], v[1]) + } else { + panic("instruction KORTESTD takes exactly 2 operands") + } +} + +func __asm_proxy_KORTESTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KORTESTQ(v[0], v[1]) + } else { + panic("instruction KORTESTQ takes exactly 2 operands") + } +} + +func __asm_proxy_KORTESTW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KORTESTW(v[0], v[1]) + } else { + panic("instruction KORTESTW takes exactly 2 operands") + } +} + +func __asm_proxy_KORW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KORW(v[0], v[1], v[2]) + } else { + panic("instruction KORW takes exactly 3 operands") + } +} + +func __asm_proxy_KSHIFTLB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KSHIFTLB(v[0], v[1], v[2]) + } else { + panic("instruction KSHIFTLB takes exactly 3 operands") + } +} + +func __asm_proxy_KSHIFTLD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KSHIFTLD(v[0], v[1], v[2]) + } else { + panic("instruction KSHIFTLD takes exactly 3 operands") + } +} + +func __asm_proxy_KSHIFTLQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KSHIFTLQ(v[0], v[1], v[2]) + } else { + panic("instruction KSHIFTLQ takes exactly 3 operands") + } +} + +func __asm_proxy_KSHIFTLW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KSHIFTLW(v[0], v[1], v[2]) + } else { + panic("instruction KSHIFTLW takes exactly 3 operands") + } +} + +func __asm_proxy_KSHIFTRB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KSHIFTRB(v[0], v[1], v[2]) + } else { + panic("instruction KSHIFTRB takes exactly 3 operands") + } +} + +func __asm_proxy_KSHIFTRD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KSHIFTRD(v[0], v[1], v[2]) + } else { + panic("instruction KSHIFTRD takes exactly 3 operands") + } +} + +func __asm_proxy_KSHIFTRQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KSHIFTRQ(v[0], v[1], v[2]) + } else { + panic("instruction KSHIFTRQ takes exactly 3 operands") + } +} + +func __asm_proxy_KSHIFTRW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KSHIFTRW(v[0], v[1], v[2]) + } else { + panic("instruction KSHIFTRW takes exactly 3 operands") + } +} + +func __asm_proxy_KTESTB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KTESTB(v[0], v[1]) + } else { + panic("instruction KTESTB takes exactly 2 operands") + } +} + +func __asm_proxy_KTESTD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KTESTD(v[0], v[1]) + } else { + panic("instruction KTESTD takes exactly 2 operands") + } +} + +func __asm_proxy_KTESTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KTESTQ(v[0], v[1]) + } else { + panic("instruction KTESTQ takes exactly 2 operands") + } +} + +func __asm_proxy_KTESTW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.KTESTW(v[0], v[1]) + } else { + panic("instruction KTESTW takes exactly 2 operands") + } +} + +func __asm_proxy_KUNPCKBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KUNPCKBW(v[0], v[1], v[2]) + } else { + panic("instruction KUNPCKBW takes exactly 3 operands") + } +} + +func __asm_proxy_KUNPCKDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KUNPCKDQ(v[0], v[1], v[2]) + } else { + panic("instruction KUNPCKDQ takes exactly 3 operands") + } +} + +func __asm_proxy_KUNPCKWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KUNPCKWD(v[0], v[1], v[2]) + } else { + panic("instruction KUNPCKWD takes exactly 3 operands") + } +} + +func __asm_proxy_KXNORB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KXNORB(v[0], v[1], v[2]) + } else { + panic("instruction KXNORB takes exactly 3 operands") + } +} + +func __asm_proxy_KXNORD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KXNORD(v[0], v[1], v[2]) + } else { + panic("instruction KXNORD takes exactly 3 operands") + } +} + +func __asm_proxy_KXNORQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KXNORQ(v[0], v[1], v[2]) + } else { + panic("instruction KXNORQ takes exactly 3 operands") + } +} + +func __asm_proxy_KXNORW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KXNORW(v[0], v[1], v[2]) + } else { + panic("instruction KXNORW takes exactly 3 operands") + } +} + +func __asm_proxy_KXORB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KXORB(v[0], v[1], v[2]) + } else { + panic("instruction KXORB takes exactly 3 operands") + } +} + +func __asm_proxy_KXORD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KXORD(v[0], v[1], v[2]) + } else { + panic("instruction KXORD takes exactly 3 operands") + } +} + +func __asm_proxy_KXORQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KXORQ(v[0], v[1], v[2]) + } else { + panic("instruction KXORQ takes exactly 3 operands") + } +} + +func __asm_proxy_KXORW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.KXORW(v[0], v[1], v[2]) + } else { + panic("instruction KXORW takes exactly 3 operands") + } +} + +func __asm_proxy_LDDQU__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.LDDQU(v[0], v[1]) + } else { + panic("instruction LDDQU takes exactly 2 operands") + } +} + +func __asm_proxy_LDMXCSR__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.LDMXCSR(v[0]) + } else { + panic("instruction LDMXCSR takes exactly 1 operand") + } +} + +func __asm_proxy_LEAL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.LEAL(v[0], v[1]) + } else { + panic("instruction LEAL takes exactly 2 operands") + } +} + +func __asm_proxy_LEAQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.LEAQ(v[0], v[1]) + } else { + panic("instruction LEAQ takes exactly 2 operands") + } +} + +func __asm_proxy_LEAW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.LEAW(v[0], v[1]) + } else { + panic("instruction LEAW takes exactly 2 operands") + } +} + +func __asm_proxy_LFENCE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.LFENCE() + } else { + panic("instruction LFENCE takes no operands") + } +} + +func __asm_proxy_LZCNTL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.LZCNTL(v[0], v[1]) + } else { + panic("instruction LZCNTL takes exactly 2 operands") + } +} + +func __asm_proxy_LZCNTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.LZCNTQ(v[0], v[1]) + } else { + panic("instruction LZCNTQ takes exactly 2 operands") + } +} + +func __asm_proxy_LZCNTW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.LZCNTW(v[0], v[1]) + } else { + panic("instruction LZCNTW takes exactly 2 operands") + } +} + +func __asm_proxy_MASKMOVDQU__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MASKMOVDQU(v[0], v[1]) + } else { + panic("instruction MASKMOVDQU takes exactly 2 operands") + } +} + +func __asm_proxy_MASKMOVQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MASKMOVQ(v[0], v[1]) + } else { + panic("instruction MASKMOVQ takes exactly 2 operands") + } +} + +func __asm_proxy_MAXPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MAXPD(v[0], v[1]) + } else { + panic("instruction MAXPD takes exactly 2 operands") + } +} + +func __asm_proxy_MAXPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MAXPS(v[0], v[1]) + } else { + panic("instruction MAXPS takes exactly 2 operands") + } +} + +func __asm_proxy_MAXSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MAXSD(v[0], v[1]) + } else { + panic("instruction MAXSD takes exactly 2 operands") + } +} + +func __asm_proxy_MAXSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MAXSS(v[0], v[1]) + } else { + panic("instruction MAXSS takes exactly 2 operands") + } +} + +func __asm_proxy_MFENCE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.MFENCE() + } else { + panic("instruction MFENCE takes no operands") + } +} + +func __asm_proxy_MINPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MINPD(v[0], v[1]) + } else { + panic("instruction MINPD takes exactly 2 operands") + } +} + +func __asm_proxy_MINPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MINPS(v[0], v[1]) + } else { + panic("instruction MINPS takes exactly 2 operands") + } +} + +func __asm_proxy_MINSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MINSD(v[0], v[1]) + } else { + panic("instruction MINSD takes exactly 2 operands") + } +} + +func __asm_proxy_MINSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MINSS(v[0], v[1]) + } else { + panic("instruction MINSS takes exactly 2 operands") + } +} + +func __asm_proxy_MONITOR__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.MONITOR() + } else { + panic("instruction MONITOR takes no operands") + } +} + +func __asm_proxy_MONITORX__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.MONITORX() + } else { + panic("instruction MONITORX takes no operands") + } +} + +func __asm_proxy_MOVAPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVAPD(v[0], v[1]) + } else { + panic("instruction MOVAPD takes exactly 2 operands") + } +} + +func __asm_proxy_MOVAPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVAPS(v[0], v[1]) + } else { + panic("instruction MOVAPS takes exactly 2 operands") + } +} + +func __asm_proxy_MOVB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVB(v[0], v[1]) + } else { + panic("instruction MOVB takes exactly 2 operands") + } +} + +func __asm_proxy_MOVBEL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVBEL(v[0], v[1]) + } else { + panic("instruction MOVBEL takes exactly 2 operands") + } +} + +func __asm_proxy_MOVBEQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVBEQ(v[0], v[1]) + } else { + panic("instruction MOVBEQ takes exactly 2 operands") + } +} + +func __asm_proxy_MOVBEW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVBEW(v[0], v[1]) + } else { + panic("instruction MOVBEW takes exactly 2 operands") + } +} + +func __asm_proxy_MOVD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVD(v[0], v[1]) + } else { + panic("instruction MOVD takes exactly 2 operands") + } +} + +func __asm_proxy_MOVDDUP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVDDUP(v[0], v[1]) + } else { + panic("instruction MOVDDUP takes exactly 2 operands") + } +} + +func __asm_proxy_MOVDQ2Q__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVDQ2Q(v[0], v[1]) + } else { + panic("instruction MOVDQ2Q takes exactly 2 operands") + } +} + +func __asm_proxy_MOVDQA__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVDQA(v[0], v[1]) + } else { + panic("instruction MOVDQA takes exactly 2 operands") + } +} + +func __asm_proxy_MOVDQU__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVDQU(v[0], v[1]) + } else { + panic("instruction MOVDQU takes exactly 2 operands") + } +} + +func __asm_proxy_MOVHLPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVHLPS(v[0], v[1]) + } else { + panic("instruction MOVHLPS takes exactly 2 operands") + } +} + +func __asm_proxy_MOVHPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVHPD(v[0], v[1]) + } else { + panic("instruction MOVHPD takes exactly 2 operands") + } +} + +func __asm_proxy_MOVHPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVHPS(v[0], v[1]) + } else { + panic("instruction MOVHPS takes exactly 2 operands") + } +} + +func __asm_proxy_MOVL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVL(v[0], v[1]) + } else { + panic("instruction MOVL takes exactly 2 operands") + } +} + +func __asm_proxy_MOVLHPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVLHPS(v[0], v[1]) + } else { + panic("instruction MOVLHPS takes exactly 2 operands") + } +} + +func __asm_proxy_MOVLPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVLPD(v[0], v[1]) + } else { + panic("instruction MOVLPD takes exactly 2 operands") + } +} + +func __asm_proxy_MOVLPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVLPS(v[0], v[1]) + } else { + panic("instruction MOVLPS takes exactly 2 operands") + } +} + +func __asm_proxy_MOVMSKPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVMSKPD(v[0], v[1]) + } else { + panic("instruction MOVMSKPD takes exactly 2 operands") + } +} + +func __asm_proxy_MOVMSKPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVMSKPS(v[0], v[1]) + } else { + panic("instruction MOVMSKPS takes exactly 2 operands") + } +} + +func __asm_proxy_MOVNTDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVNTDQ(v[0], v[1]) + } else { + panic("instruction MOVNTDQ takes exactly 2 operands") + } +} + +func __asm_proxy_MOVNTDQA__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVNTDQA(v[0], v[1]) + } else { + panic("instruction MOVNTDQA takes exactly 2 operands") + } +} + +func __asm_proxy_MOVNTIL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVNTIL(v[0], v[1]) + } else { + panic("instruction MOVNTIL takes exactly 2 operands") + } +} + +func __asm_proxy_MOVNTIQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVNTIQ(v[0], v[1]) + } else { + panic("instruction MOVNTIQ takes exactly 2 operands") + } +} + +func __asm_proxy_MOVNTPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVNTPD(v[0], v[1]) + } else { + panic("instruction MOVNTPD takes exactly 2 operands") + } +} + +func __asm_proxy_MOVNTPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVNTPS(v[0], v[1]) + } else { + panic("instruction MOVNTPS takes exactly 2 operands") + } +} + +func __asm_proxy_MOVNTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVNTQ(v[0], v[1]) + } else { + panic("instruction MOVNTQ takes exactly 2 operands") + } +} + +func __asm_proxy_MOVNTSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVNTSD(v[0], v[1]) + } else { + panic("instruction MOVNTSD takes exactly 2 operands") + } +} + +func __asm_proxy_MOVNTSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVNTSS(v[0], v[1]) + } else { + panic("instruction MOVNTSS takes exactly 2 operands") + } +} + +func __asm_proxy_MOVQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVQ(v[0], v[1]) + } else { + panic("instruction MOVQ takes exactly 2 operands") + } +} + +func __asm_proxy_MOVQ2DQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVQ2DQ(v[0], v[1]) + } else { + panic("instruction MOVQ2DQ takes exactly 2 operands") + } +} + +func __asm_proxy_MOVSBL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVSBL(v[0], v[1]) + } else { + panic("instruction MOVSBL takes exactly 2 operands") + } +} + +func __asm_proxy_MOVSBQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVSBQ(v[0], v[1]) + } else { + panic("instruction MOVSBQ takes exactly 2 operands") + } +} + +func __asm_proxy_MOVSBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVSBW(v[0], v[1]) + } else { + panic("instruction MOVSBW takes exactly 2 operands") + } +} + +func __asm_proxy_MOVSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVSD(v[0], v[1]) + } else { + panic("instruction MOVSD takes exactly 2 operands") + } +} + +func __asm_proxy_MOVSHDUP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVSHDUP(v[0], v[1]) + } else { + panic("instruction MOVSHDUP takes exactly 2 operands") + } +} + +func __asm_proxy_MOVSLDUP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVSLDUP(v[0], v[1]) + } else { + panic("instruction MOVSLDUP takes exactly 2 operands") + } +} + +func __asm_proxy_MOVSLQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVSLQ(v[0], v[1]) + } else { + panic("instruction MOVSLQ takes exactly 2 operands") + } +} + +func __asm_proxy_MOVSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVSS(v[0], v[1]) + } else { + panic("instruction MOVSS takes exactly 2 operands") + } +} + +func __asm_proxy_MOVSWL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVSWL(v[0], v[1]) + } else { + panic("instruction MOVSWL takes exactly 2 operands") + } +} + +func __asm_proxy_MOVSWQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVSWQ(v[0], v[1]) + } else { + panic("instruction MOVSWQ takes exactly 2 operands") + } +} + +func __asm_proxy_MOVUPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVUPD(v[0], v[1]) + } else { + panic("instruction MOVUPD takes exactly 2 operands") + } +} + +func __asm_proxy_MOVUPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVUPS(v[0], v[1]) + } else { + panic("instruction MOVUPS takes exactly 2 operands") + } +} + +func __asm_proxy_MOVW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVW(v[0], v[1]) + } else { + panic("instruction MOVW takes exactly 2 operands") + } +} + +func __asm_proxy_MOVZBL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVZBL(v[0], v[1]) + } else { + panic("instruction MOVZBL takes exactly 2 operands") + } +} + +func __asm_proxy_MOVZBQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVZBQ(v[0], v[1]) + } else { + panic("instruction MOVZBQ takes exactly 2 operands") + } +} + +func __asm_proxy_MOVZBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVZBW(v[0], v[1]) + } else { + panic("instruction MOVZBW takes exactly 2 operands") + } +} + +func __asm_proxy_MOVZWL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVZWL(v[0], v[1]) + } else { + panic("instruction MOVZWL takes exactly 2 operands") + } +} + +func __asm_proxy_MOVZWQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MOVZWQ(v[0], v[1]) + } else { + panic("instruction MOVZWQ takes exactly 2 operands") + } +} + +func __asm_proxy_MPSADBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.MPSADBW(v[0], v[1], v[2]) + } else { + panic("instruction MPSADBW takes exactly 3 operands") + } +} + +func __asm_proxy_MULB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.MULB(v[0]) + } else { + panic("instruction MULB takes exactly 1 operand") + } +} + +func __asm_proxy_MULL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.MULL(v[0]) + } else { + panic("instruction MULL takes exactly 1 operand") + } +} + +func __asm_proxy_MULPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MULPD(v[0], v[1]) + } else { + panic("instruction MULPD takes exactly 2 operands") + } +} + +func __asm_proxy_MULPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MULPS(v[0], v[1]) + } else { + panic("instruction MULPS takes exactly 2 operands") + } +} + +func __asm_proxy_MULQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.MULQ(v[0]) + } else { + panic("instruction MULQ takes exactly 1 operand") + } +} + +func __asm_proxy_MULSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MULSD(v[0], v[1]) + } else { + panic("instruction MULSD takes exactly 2 operands") + } +} + +func __asm_proxy_MULSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.MULSS(v[0], v[1]) + } else { + panic("instruction MULSS takes exactly 2 operands") + } +} + +func __asm_proxy_MULW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.MULW(v[0]) + } else { + panic("instruction MULW takes exactly 1 operand") + } +} + +func __asm_proxy_MULXL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.MULXL(v[0], v[1], v[2]) + } else { + panic("instruction MULXL takes exactly 3 operands") + } +} + +func __asm_proxy_MULXQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.MULXQ(v[0], v[1], v[2]) + } else { + panic("instruction MULXQ takes exactly 3 operands") + } +} + +func __asm_proxy_MWAIT__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.MWAIT() + } else { + panic("instruction MWAIT takes no operands") + } +} + +func __asm_proxy_MWAITX__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.MWAITX() + } else { + panic("instruction MWAITX takes no operands") + } +} + +func __asm_proxy_NEGB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.NEGB(v[0]) + } else { + panic("instruction NEGB takes exactly 1 operand") + } +} + +func __asm_proxy_NEGL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.NEGL(v[0]) + } else { + panic("instruction NEGL takes exactly 1 operand") + } +} + +func __asm_proxy_NEGQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.NEGQ(v[0]) + } else { + panic("instruction NEGQ takes exactly 1 operand") + } +} + +func __asm_proxy_NEGW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.NEGW(v[0]) + } else { + panic("instruction NEGW takes exactly 1 operand") + } +} + +func __asm_proxy_NOP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.NOP() + } else { + panic("instruction NOP takes no operands") + } +} + +func __asm_proxy_NOTB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.NOTB(v[0]) + } else { + panic("instruction NOTB takes exactly 1 operand") + } +} + +func __asm_proxy_NOTL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.NOTL(v[0]) + } else { + panic("instruction NOTL takes exactly 1 operand") + } +} + +func __asm_proxy_NOTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.NOTQ(v[0]) + } else { + panic("instruction NOTQ takes exactly 1 operand") + } +} + +func __asm_proxy_NOTW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.NOTW(v[0]) + } else { + panic("instruction NOTW takes exactly 1 operand") + } +} + +func __asm_proxy_ORB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ORB(v[0], v[1]) + } else { + panic("instruction ORB takes exactly 2 operands") + } +} + +func __asm_proxy_ORL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ORL(v[0], v[1]) + } else { + panic("instruction ORL takes exactly 2 operands") + } +} + +func __asm_proxy_ORPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ORPD(v[0], v[1]) + } else { + panic("instruction ORPD takes exactly 2 operands") + } +} + +func __asm_proxy_ORPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ORPS(v[0], v[1]) + } else { + panic("instruction ORPS takes exactly 2 operands") + } +} + +func __asm_proxy_ORQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ORQ(v[0], v[1]) + } else { + panic("instruction ORQ takes exactly 2 operands") + } +} + +func __asm_proxy_ORW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ORW(v[0], v[1]) + } else { + panic("instruction ORW takes exactly 2 operands") + } +} + +func __asm_proxy_PABSB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PABSB(v[0], v[1]) + } else { + panic("instruction PABSB takes exactly 2 operands") + } +} + +func __asm_proxy_PABSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PABSD(v[0], v[1]) + } else { + panic("instruction PABSD takes exactly 2 operands") + } +} + +func __asm_proxy_PABSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PABSW(v[0], v[1]) + } else { + panic("instruction PABSW takes exactly 2 operands") + } +} + +func __asm_proxy_PACKSSDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PACKSSDW(v[0], v[1]) + } else { + panic("instruction PACKSSDW takes exactly 2 operands") + } +} + +func __asm_proxy_PACKSSWB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PACKSSWB(v[0], v[1]) + } else { + panic("instruction PACKSSWB takes exactly 2 operands") + } +} + +func __asm_proxy_PACKUSDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PACKUSDW(v[0], v[1]) + } else { + panic("instruction PACKUSDW takes exactly 2 operands") + } +} + +func __asm_proxy_PACKUSWB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PACKUSWB(v[0], v[1]) + } else { + panic("instruction PACKUSWB takes exactly 2 operands") + } +} + +func __asm_proxy_PADDB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PADDB(v[0], v[1]) + } else { + panic("instruction PADDB takes exactly 2 operands") + } +} + +func __asm_proxy_PADDD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PADDD(v[0], v[1]) + } else { + panic("instruction PADDD takes exactly 2 operands") + } +} + +func __asm_proxy_PADDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PADDQ(v[0], v[1]) + } else { + panic("instruction PADDQ takes exactly 2 operands") + } +} + +func __asm_proxy_PADDSB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PADDSB(v[0], v[1]) + } else { + panic("instruction PADDSB takes exactly 2 operands") + } +} + +func __asm_proxy_PADDSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PADDSW(v[0], v[1]) + } else { + panic("instruction PADDSW takes exactly 2 operands") + } +} + +func __asm_proxy_PADDUSB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PADDUSB(v[0], v[1]) + } else { + panic("instruction PADDUSB takes exactly 2 operands") + } +} + +func __asm_proxy_PADDUSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PADDUSW(v[0], v[1]) + } else { + panic("instruction PADDUSW takes exactly 2 operands") + } +} + +func __asm_proxy_PADDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PADDW(v[0], v[1]) + } else { + panic("instruction PADDW takes exactly 2 operands") + } +} + +func __asm_proxy_PALIGNR__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PALIGNR(v[0], v[1], v[2]) + } else { + panic("instruction PALIGNR takes exactly 3 operands") + } +} + +func __asm_proxy_PAND__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PAND(v[0], v[1]) + } else { + panic("instruction PAND takes exactly 2 operands") + } +} + +func __asm_proxy_PANDN__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PANDN(v[0], v[1]) + } else { + panic("instruction PANDN takes exactly 2 operands") + } +} + +func __asm_proxy_PAUSE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.PAUSE() + } else { + panic("instruction PAUSE takes no operands") + } +} + +func __asm_proxy_PAVGB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PAVGB(v[0], v[1]) + } else { + panic("instruction PAVGB takes exactly 2 operands") + } +} + +func __asm_proxy_PAVGUSB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PAVGUSB(v[0], v[1]) + } else { + panic("instruction PAVGUSB takes exactly 2 operands") + } +} + +func __asm_proxy_PAVGW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PAVGW(v[0], v[1]) + } else { + panic("instruction PAVGW takes exactly 2 operands") + } +} + +func __asm_proxy_PBLENDVB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PBLENDVB(v[0], v[1], v[2]) + } else { + panic("instruction PBLENDVB takes exactly 3 operands") + } +} + +func __asm_proxy_PBLENDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PBLENDW(v[0], v[1], v[2]) + } else { + panic("instruction PBLENDW takes exactly 3 operands") + } +} + +func __asm_proxy_PCLMULQDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PCLMULQDQ(v[0], v[1], v[2]) + } else { + panic("instruction PCLMULQDQ takes exactly 3 operands") + } +} + +func __asm_proxy_PCMPEQB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PCMPEQB(v[0], v[1]) + } else { + panic("instruction PCMPEQB takes exactly 2 operands") + } +} + +func __asm_proxy_PCMPEQD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PCMPEQD(v[0], v[1]) + } else { + panic("instruction PCMPEQD takes exactly 2 operands") + } +} + +func __asm_proxy_PCMPEQQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PCMPEQQ(v[0], v[1]) + } else { + panic("instruction PCMPEQQ takes exactly 2 operands") + } +} + +func __asm_proxy_PCMPEQW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PCMPEQW(v[0], v[1]) + } else { + panic("instruction PCMPEQW takes exactly 2 operands") + } +} + +func __asm_proxy_PCMPESTRI__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PCMPESTRI(v[0], v[1], v[2]) + } else { + panic("instruction PCMPESTRI takes exactly 3 operands") + } +} + +func __asm_proxy_PCMPESTRM__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PCMPESTRM(v[0], v[1], v[2]) + } else { + panic("instruction PCMPESTRM takes exactly 3 operands") + } +} + +func __asm_proxy_PCMPGTB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PCMPGTB(v[0], v[1]) + } else { + panic("instruction PCMPGTB takes exactly 2 operands") + } +} + +func __asm_proxy_PCMPGTD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PCMPGTD(v[0], v[1]) + } else { + panic("instruction PCMPGTD takes exactly 2 operands") + } +} + +func __asm_proxy_PCMPGTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PCMPGTQ(v[0], v[1]) + } else { + panic("instruction PCMPGTQ takes exactly 2 operands") + } +} + +func __asm_proxy_PCMPGTW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PCMPGTW(v[0], v[1]) + } else { + panic("instruction PCMPGTW takes exactly 2 operands") + } +} + +func __asm_proxy_PCMPISTRI__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PCMPISTRI(v[0], v[1], v[2]) + } else { + panic("instruction PCMPISTRI takes exactly 3 operands") + } +} + +func __asm_proxy_PCMPISTRM__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PCMPISTRM(v[0], v[1], v[2]) + } else { + panic("instruction PCMPISTRM takes exactly 3 operands") + } +} + +func __asm_proxy_PDEP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PDEP(v[0], v[1], v[2]) + } else { + panic("instruction PDEP takes exactly 3 operands") + } +} + +func __asm_proxy_PEXT__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PEXT(v[0], v[1], v[2]) + } else { + panic("instruction PEXT takes exactly 3 operands") + } +} + +func __asm_proxy_PEXTRB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PEXTRB(v[0], v[1], v[2]) + } else { + panic("instruction PEXTRB takes exactly 3 operands") + } +} + +func __asm_proxy_PEXTRD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PEXTRD(v[0], v[1], v[2]) + } else { + panic("instruction PEXTRD takes exactly 3 operands") + } +} + +func __asm_proxy_PEXTRQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PEXTRQ(v[0], v[1], v[2]) + } else { + panic("instruction PEXTRQ takes exactly 3 operands") + } +} + +func __asm_proxy_PEXTRW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PEXTRW(v[0], v[1], v[2]) + } else { + panic("instruction PEXTRW takes exactly 3 operands") + } +} + +func __asm_proxy_PF2ID__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PF2ID(v[0], v[1]) + } else { + panic("instruction PF2ID takes exactly 2 operands") + } +} + +func __asm_proxy_PF2IW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PF2IW(v[0], v[1]) + } else { + panic("instruction PF2IW takes exactly 2 operands") + } +} + +func __asm_proxy_PFACC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFACC(v[0], v[1]) + } else { + panic("instruction PFACC takes exactly 2 operands") + } +} + +func __asm_proxy_PFADD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFADD(v[0], v[1]) + } else { + panic("instruction PFADD takes exactly 2 operands") + } +} + +func __asm_proxy_PFCMPEQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFCMPEQ(v[0], v[1]) + } else { + panic("instruction PFCMPEQ takes exactly 2 operands") + } +} + +func __asm_proxy_PFCMPGE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFCMPGE(v[0], v[1]) + } else { + panic("instruction PFCMPGE takes exactly 2 operands") + } +} + +func __asm_proxy_PFCMPGT__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFCMPGT(v[0], v[1]) + } else { + panic("instruction PFCMPGT takes exactly 2 operands") + } +} + +func __asm_proxy_PFMAX__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFMAX(v[0], v[1]) + } else { + panic("instruction PFMAX takes exactly 2 operands") + } +} + +func __asm_proxy_PFMIN__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFMIN(v[0], v[1]) + } else { + panic("instruction PFMIN takes exactly 2 operands") + } +} + +func __asm_proxy_PFMUL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFMUL(v[0], v[1]) + } else { + panic("instruction PFMUL takes exactly 2 operands") + } +} + +func __asm_proxy_PFNACC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFNACC(v[0], v[1]) + } else { + panic("instruction PFNACC takes exactly 2 operands") + } +} + +func __asm_proxy_PFPNACC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFPNACC(v[0], v[1]) + } else { + panic("instruction PFPNACC takes exactly 2 operands") + } +} + +func __asm_proxy_PFRCP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFRCP(v[0], v[1]) + } else { + panic("instruction PFRCP takes exactly 2 operands") + } +} + +func __asm_proxy_PFRCPIT1__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFRCPIT1(v[0], v[1]) + } else { + panic("instruction PFRCPIT1 takes exactly 2 operands") + } +} + +func __asm_proxy_PFRCPIT2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFRCPIT2(v[0], v[1]) + } else { + panic("instruction PFRCPIT2 takes exactly 2 operands") + } +} + +func __asm_proxy_PFRSQIT1__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFRSQIT1(v[0], v[1]) + } else { + panic("instruction PFRSQIT1 takes exactly 2 operands") + } +} + +func __asm_proxy_PFRSQRT__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFRSQRT(v[0], v[1]) + } else { + panic("instruction PFRSQRT takes exactly 2 operands") + } +} + +func __asm_proxy_PFSUB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFSUB(v[0], v[1]) + } else { + panic("instruction PFSUB takes exactly 2 operands") + } +} + +func __asm_proxy_PFSUBR__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PFSUBR(v[0], v[1]) + } else { + panic("instruction PFSUBR takes exactly 2 operands") + } +} + +func __asm_proxy_PHADDD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PHADDD(v[0], v[1]) + } else { + panic("instruction PHADDD takes exactly 2 operands") + } +} + +func __asm_proxy_PHADDSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PHADDSW(v[0], v[1]) + } else { + panic("instruction PHADDSW takes exactly 2 operands") + } +} + +func __asm_proxy_PHADDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PHADDW(v[0], v[1]) + } else { + panic("instruction PHADDW takes exactly 2 operands") + } +} + +func __asm_proxy_PHMINPOSUW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PHMINPOSUW(v[0], v[1]) + } else { + panic("instruction PHMINPOSUW takes exactly 2 operands") + } +} + +func __asm_proxy_PHSUBD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PHSUBD(v[0], v[1]) + } else { + panic("instruction PHSUBD takes exactly 2 operands") + } +} + +func __asm_proxy_PHSUBSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PHSUBSW(v[0], v[1]) + } else { + panic("instruction PHSUBSW takes exactly 2 operands") + } +} + +func __asm_proxy_PHSUBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PHSUBW(v[0], v[1]) + } else { + panic("instruction PHSUBW takes exactly 2 operands") + } +} + +func __asm_proxy_PI2FD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PI2FD(v[0], v[1]) + } else { + panic("instruction PI2FD takes exactly 2 operands") + } +} + +func __asm_proxy_PI2FW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PI2FW(v[0], v[1]) + } else { + panic("instruction PI2FW takes exactly 2 operands") + } +} + +func __asm_proxy_PINSRB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PINSRB(v[0], v[1], v[2]) + } else { + panic("instruction PINSRB takes exactly 3 operands") + } +} + +func __asm_proxy_PINSRD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PINSRD(v[0], v[1], v[2]) + } else { + panic("instruction PINSRD takes exactly 3 operands") + } +} + +func __asm_proxy_PINSRQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PINSRQ(v[0], v[1], v[2]) + } else { + panic("instruction PINSRQ takes exactly 3 operands") + } +} + +func __asm_proxy_PINSRW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PINSRW(v[0], v[1], v[2]) + } else { + panic("instruction PINSRW takes exactly 3 operands") + } +} + +func __asm_proxy_PMADDUBSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMADDUBSW(v[0], v[1]) + } else { + panic("instruction PMADDUBSW takes exactly 2 operands") + } +} + +func __asm_proxy_PMADDWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMADDWD(v[0], v[1]) + } else { + panic("instruction PMADDWD takes exactly 2 operands") + } +} + +func __asm_proxy_PMAXSB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMAXSB(v[0], v[1]) + } else { + panic("instruction PMAXSB takes exactly 2 operands") + } +} + +func __asm_proxy_PMAXSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMAXSD(v[0], v[1]) + } else { + panic("instruction PMAXSD takes exactly 2 operands") + } +} + +func __asm_proxy_PMAXSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMAXSW(v[0], v[1]) + } else { + panic("instruction PMAXSW takes exactly 2 operands") + } +} + +func __asm_proxy_PMAXUB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMAXUB(v[0], v[1]) + } else { + panic("instruction PMAXUB takes exactly 2 operands") + } +} + +func __asm_proxy_PMAXUD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMAXUD(v[0], v[1]) + } else { + panic("instruction PMAXUD takes exactly 2 operands") + } +} + +func __asm_proxy_PMAXUW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMAXUW(v[0], v[1]) + } else { + panic("instruction PMAXUW takes exactly 2 operands") + } +} + +func __asm_proxy_PMINSB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMINSB(v[0], v[1]) + } else { + panic("instruction PMINSB takes exactly 2 operands") + } +} + +func __asm_proxy_PMINSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMINSD(v[0], v[1]) + } else { + panic("instruction PMINSD takes exactly 2 operands") + } +} + +func __asm_proxy_PMINSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMINSW(v[0], v[1]) + } else { + panic("instruction PMINSW takes exactly 2 operands") + } +} + +func __asm_proxy_PMINUB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMINUB(v[0], v[1]) + } else { + panic("instruction PMINUB takes exactly 2 operands") + } +} + +func __asm_proxy_PMINUD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMINUD(v[0], v[1]) + } else { + panic("instruction PMINUD takes exactly 2 operands") + } +} + +func __asm_proxy_PMINUW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMINUW(v[0], v[1]) + } else { + panic("instruction PMINUW takes exactly 2 operands") + } +} + +func __asm_proxy_PMOVMSKB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMOVMSKB(v[0], v[1]) + } else { + panic("instruction PMOVMSKB takes exactly 2 operands") + } +} + +func __asm_proxy_PMOVSXBD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMOVSXBD(v[0], v[1]) + } else { + panic("instruction PMOVSXBD takes exactly 2 operands") + } +} + +func __asm_proxy_PMOVSXBQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMOVSXBQ(v[0], v[1]) + } else { + panic("instruction PMOVSXBQ takes exactly 2 operands") + } +} + +func __asm_proxy_PMOVSXBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMOVSXBW(v[0], v[1]) + } else { + panic("instruction PMOVSXBW takes exactly 2 operands") + } +} + +func __asm_proxy_PMOVSXDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMOVSXDQ(v[0], v[1]) + } else { + panic("instruction PMOVSXDQ takes exactly 2 operands") + } +} + +func __asm_proxy_PMOVSXWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMOVSXWD(v[0], v[1]) + } else { + panic("instruction PMOVSXWD takes exactly 2 operands") + } +} + +func __asm_proxy_PMOVSXWQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMOVSXWQ(v[0], v[1]) + } else { + panic("instruction PMOVSXWQ takes exactly 2 operands") + } +} + +func __asm_proxy_PMOVZXBD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMOVZXBD(v[0], v[1]) + } else { + panic("instruction PMOVZXBD takes exactly 2 operands") + } +} + +func __asm_proxy_PMOVZXBQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMOVZXBQ(v[0], v[1]) + } else { + panic("instruction PMOVZXBQ takes exactly 2 operands") + } +} + +func __asm_proxy_PMOVZXBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMOVZXBW(v[0], v[1]) + } else { + panic("instruction PMOVZXBW takes exactly 2 operands") + } +} + +func __asm_proxy_PMOVZXDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMOVZXDQ(v[0], v[1]) + } else { + panic("instruction PMOVZXDQ takes exactly 2 operands") + } +} + +func __asm_proxy_PMOVZXWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMOVZXWD(v[0], v[1]) + } else { + panic("instruction PMOVZXWD takes exactly 2 operands") + } +} + +func __asm_proxy_PMOVZXWQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMOVZXWQ(v[0], v[1]) + } else { + panic("instruction PMOVZXWQ takes exactly 2 operands") + } +} + +func __asm_proxy_PMULDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMULDQ(v[0], v[1]) + } else { + panic("instruction PMULDQ takes exactly 2 operands") + } +} + +func __asm_proxy_PMULHRSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMULHRSW(v[0], v[1]) + } else { + panic("instruction PMULHRSW takes exactly 2 operands") + } +} + +func __asm_proxy_PMULHRW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMULHRW(v[0], v[1]) + } else { + panic("instruction PMULHRW takes exactly 2 operands") + } +} + +func __asm_proxy_PMULHUW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMULHUW(v[0], v[1]) + } else { + panic("instruction PMULHUW takes exactly 2 operands") + } +} + +func __asm_proxy_PMULHW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMULHW(v[0], v[1]) + } else { + panic("instruction PMULHW takes exactly 2 operands") + } +} + +func __asm_proxy_PMULLD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMULLD(v[0], v[1]) + } else { + panic("instruction PMULLD takes exactly 2 operands") + } +} + +func __asm_proxy_PMULLW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMULLW(v[0], v[1]) + } else { + panic("instruction PMULLW takes exactly 2 operands") + } +} + +func __asm_proxy_PMULUDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PMULUDQ(v[0], v[1]) + } else { + panic("instruction PMULUDQ takes exactly 2 operands") + } +} + +func __asm_proxy_POPCNTL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.POPCNTL(v[0], v[1]) + } else { + panic("instruction POPCNTL takes exactly 2 operands") + } +} + +func __asm_proxy_POPCNTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.POPCNTQ(v[0], v[1]) + } else { + panic("instruction POPCNTQ takes exactly 2 operands") + } +} + +func __asm_proxy_POPCNTW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.POPCNTW(v[0], v[1]) + } else { + panic("instruction POPCNTW takes exactly 2 operands") + } +} + +func __asm_proxy_POPQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.POPQ(v[0]) + } else { + panic("instruction POPQ takes exactly 1 operand") + } +} + +func __asm_proxy_POPW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.POPW(v[0]) + } else { + panic("instruction POPW takes exactly 1 operand") + } +} + +func __asm_proxy_POR__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.POR(v[0], v[1]) + } else { + panic("instruction POR takes exactly 2 operands") + } +} + +func __asm_proxy_PREFETCH__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.PREFETCH(v[0]) + } else { + panic("instruction PREFETCH takes exactly 1 operand") + } +} + +func __asm_proxy_PREFETCHNTA__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.PREFETCHNTA(v[0]) + } else { + panic("instruction PREFETCHNTA takes exactly 1 operand") + } +} + +func __asm_proxy_PREFETCHT0__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.PREFETCHT0(v[0]) + } else { + panic("instruction PREFETCHT0 takes exactly 1 operand") + } +} + +func __asm_proxy_PREFETCHT1__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.PREFETCHT1(v[0]) + } else { + panic("instruction PREFETCHT1 takes exactly 1 operand") + } +} + +func __asm_proxy_PREFETCHT2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.PREFETCHT2(v[0]) + } else { + panic("instruction PREFETCHT2 takes exactly 1 operand") + } +} + +func __asm_proxy_PREFETCHW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.PREFETCHW(v[0]) + } else { + panic("instruction PREFETCHW takes exactly 1 operand") + } +} + +func __asm_proxy_PREFETCHWT1__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.PREFETCHWT1(v[0]) + } else { + panic("instruction PREFETCHWT1 takes exactly 1 operand") + } +} + +func __asm_proxy_PSADBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSADBW(v[0], v[1]) + } else { + panic("instruction PSADBW takes exactly 2 operands") + } +} + +func __asm_proxy_PSHUFB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSHUFB(v[0], v[1]) + } else { + panic("instruction PSHUFB takes exactly 2 operands") + } +} + +func __asm_proxy_PSHUFD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PSHUFD(v[0], v[1], v[2]) + } else { + panic("instruction PSHUFD takes exactly 3 operands") + } +} + +func __asm_proxy_PSHUFHW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PSHUFHW(v[0], v[1], v[2]) + } else { + panic("instruction PSHUFHW takes exactly 3 operands") + } +} + +func __asm_proxy_PSHUFLW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PSHUFLW(v[0], v[1], v[2]) + } else { + panic("instruction PSHUFLW takes exactly 3 operands") + } +} + +func __asm_proxy_PSHUFW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.PSHUFW(v[0], v[1], v[2]) + } else { + panic("instruction PSHUFW takes exactly 3 operands") + } +} + +func __asm_proxy_PSIGNB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSIGNB(v[0], v[1]) + } else { + panic("instruction PSIGNB takes exactly 2 operands") + } +} + +func __asm_proxy_PSIGND__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSIGND(v[0], v[1]) + } else { + panic("instruction PSIGND takes exactly 2 operands") + } +} + +func __asm_proxy_PSIGNW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSIGNW(v[0], v[1]) + } else { + panic("instruction PSIGNW takes exactly 2 operands") + } +} + +func __asm_proxy_PSLLD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSLLD(v[0], v[1]) + } else { + panic("instruction PSLLD takes exactly 2 operands") + } +} + +func __asm_proxy_PSLLDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSLLDQ(v[0], v[1]) + } else { + panic("instruction PSLLDQ takes exactly 2 operands") + } +} + +func __asm_proxy_PSLLQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSLLQ(v[0], v[1]) + } else { + panic("instruction PSLLQ takes exactly 2 operands") + } +} + +func __asm_proxy_PSLLW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSLLW(v[0], v[1]) + } else { + panic("instruction PSLLW takes exactly 2 operands") + } +} + +func __asm_proxy_PSRAD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSRAD(v[0], v[1]) + } else { + panic("instruction PSRAD takes exactly 2 operands") + } +} + +func __asm_proxy_PSRAW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSRAW(v[0], v[1]) + } else { + panic("instruction PSRAW takes exactly 2 operands") + } +} + +func __asm_proxy_PSRLD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSRLD(v[0], v[1]) + } else { + panic("instruction PSRLD takes exactly 2 operands") + } +} + +func __asm_proxy_PSRLDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSRLDQ(v[0], v[1]) + } else { + panic("instruction PSRLDQ takes exactly 2 operands") + } +} + +func __asm_proxy_PSRLQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSRLQ(v[0], v[1]) + } else { + panic("instruction PSRLQ takes exactly 2 operands") + } +} + +func __asm_proxy_PSRLW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSRLW(v[0], v[1]) + } else { + panic("instruction PSRLW takes exactly 2 operands") + } +} + +func __asm_proxy_PSUBB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSUBB(v[0], v[1]) + } else { + panic("instruction PSUBB takes exactly 2 operands") + } +} + +func __asm_proxy_PSUBD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSUBD(v[0], v[1]) + } else { + panic("instruction PSUBD takes exactly 2 operands") + } +} + +func __asm_proxy_PSUBQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSUBQ(v[0], v[1]) + } else { + panic("instruction PSUBQ takes exactly 2 operands") + } +} + +func __asm_proxy_PSUBSB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSUBSB(v[0], v[1]) + } else { + panic("instruction PSUBSB takes exactly 2 operands") + } +} + +func __asm_proxy_PSUBSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSUBSW(v[0], v[1]) + } else { + panic("instruction PSUBSW takes exactly 2 operands") + } +} + +func __asm_proxy_PSUBUSB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSUBUSB(v[0], v[1]) + } else { + panic("instruction PSUBUSB takes exactly 2 operands") + } +} + +func __asm_proxy_PSUBUSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSUBUSW(v[0], v[1]) + } else { + panic("instruction PSUBUSW takes exactly 2 operands") + } +} + +func __asm_proxy_PSUBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSUBW(v[0], v[1]) + } else { + panic("instruction PSUBW takes exactly 2 operands") + } +} + +func __asm_proxy_PSWAPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PSWAPD(v[0], v[1]) + } else { + panic("instruction PSWAPD takes exactly 2 operands") + } +} + +func __asm_proxy_PTEST__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PTEST(v[0], v[1]) + } else { + panic("instruction PTEST takes exactly 2 operands") + } +} + +func __asm_proxy_PUNPCKHBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PUNPCKHBW(v[0], v[1]) + } else { + panic("instruction PUNPCKHBW takes exactly 2 operands") + } +} + +func __asm_proxy_PUNPCKHDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PUNPCKHDQ(v[0], v[1]) + } else { + panic("instruction PUNPCKHDQ takes exactly 2 operands") + } +} + +func __asm_proxy_PUNPCKHQDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PUNPCKHQDQ(v[0], v[1]) + } else { + panic("instruction PUNPCKHQDQ takes exactly 2 operands") + } +} + +func __asm_proxy_PUNPCKHWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PUNPCKHWD(v[0], v[1]) + } else { + panic("instruction PUNPCKHWD takes exactly 2 operands") + } +} + +func __asm_proxy_PUNPCKLBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PUNPCKLBW(v[0], v[1]) + } else { + panic("instruction PUNPCKLBW takes exactly 2 operands") + } +} + +func __asm_proxy_PUNPCKLDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PUNPCKLDQ(v[0], v[1]) + } else { + panic("instruction PUNPCKLDQ takes exactly 2 operands") + } +} + +func __asm_proxy_PUNPCKLQDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PUNPCKLQDQ(v[0], v[1]) + } else { + panic("instruction PUNPCKLQDQ takes exactly 2 operands") + } +} + +func __asm_proxy_PUNPCKLWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PUNPCKLWD(v[0], v[1]) + } else { + panic("instruction PUNPCKLWD takes exactly 2 operands") + } +} + +func __asm_proxy_PUSHQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.PUSHQ(v[0]) + } else { + panic("instruction PUSHQ takes exactly 1 operand") + } +} + +func __asm_proxy_PUSHW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.PUSHW(v[0]) + } else { + panic("instruction PUSHW takes exactly 1 operand") + } +} + +func __asm_proxy_PXOR__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.PXOR(v[0], v[1]) + } else { + panic("instruction PXOR takes exactly 2 operands") + } +} + +func __asm_proxy_RCLB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RCLB(v[0], v[1]) + } else { + panic("instruction RCLB takes exactly 2 operands") + } +} + +func __asm_proxy_RCLL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RCLL(v[0], v[1]) + } else { + panic("instruction RCLL takes exactly 2 operands") + } +} + +func __asm_proxy_RCLQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RCLQ(v[0], v[1]) + } else { + panic("instruction RCLQ takes exactly 2 operands") + } +} + +func __asm_proxy_RCLW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RCLW(v[0], v[1]) + } else { + panic("instruction RCLW takes exactly 2 operands") + } +} + +func __asm_proxy_RCPPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RCPPS(v[0], v[1]) + } else { + panic("instruction RCPPS takes exactly 2 operands") + } +} + +func __asm_proxy_RCPSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RCPSS(v[0], v[1]) + } else { + panic("instruction RCPSS takes exactly 2 operands") + } +} + +func __asm_proxy_RCRB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RCRB(v[0], v[1]) + } else { + panic("instruction RCRB takes exactly 2 operands") + } +} + +func __asm_proxy_RCRL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RCRL(v[0], v[1]) + } else { + panic("instruction RCRL takes exactly 2 operands") + } +} + +func __asm_proxy_RCRQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RCRQ(v[0], v[1]) + } else { + panic("instruction RCRQ takes exactly 2 operands") + } +} + +func __asm_proxy_RCRW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RCRW(v[0], v[1]) + } else { + panic("instruction RCRW takes exactly 2 operands") + } +} + +func __asm_proxy_RDRAND__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.RDRAND(v[0]) + } else { + panic("instruction RDRAND takes exactly 1 operand") + } +} + +func __asm_proxy_RDSEED__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.RDSEED(v[0]) + } else { + panic("instruction RDSEED takes exactly 1 operand") + } +} + +func __asm_proxy_RDTSC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.RDTSC() + } else { + panic("instruction RDTSC takes no operands") + } +} + +func __asm_proxy_RDTSCP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.RDTSCP() + } else { + panic("instruction RDTSCP takes no operands") + } +} + +func __asm_proxy_RET__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 0 : return p.RET() + case 1 : return p.RET(v[0]) + default : panic("instruction RET takes 0 or 1 operands") + } +} + +func __asm_proxy_ROLB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ROLB(v[0], v[1]) + } else { + panic("instruction ROLB takes exactly 2 operands") + } +} + +func __asm_proxy_ROLL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ROLL(v[0], v[1]) + } else { + panic("instruction ROLL takes exactly 2 operands") + } +} + +func __asm_proxy_ROLQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ROLQ(v[0], v[1]) + } else { + panic("instruction ROLQ takes exactly 2 operands") + } +} + +func __asm_proxy_ROLW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.ROLW(v[0], v[1]) + } else { + panic("instruction ROLW takes exactly 2 operands") + } +} + +func __asm_proxy_RORB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RORB(v[0], v[1]) + } else { + panic("instruction RORB takes exactly 2 operands") + } +} + +func __asm_proxy_RORL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RORL(v[0], v[1]) + } else { + panic("instruction RORL takes exactly 2 operands") + } +} + +func __asm_proxy_RORQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RORQ(v[0], v[1]) + } else { + panic("instruction RORQ takes exactly 2 operands") + } +} + +func __asm_proxy_RORW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RORW(v[0], v[1]) + } else { + panic("instruction RORW takes exactly 2 operands") + } +} + +func __asm_proxy_RORXL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.RORXL(v[0], v[1], v[2]) + } else { + panic("instruction RORXL takes exactly 3 operands") + } +} + +func __asm_proxy_RORXQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.RORXQ(v[0], v[1], v[2]) + } else { + panic("instruction RORXQ takes exactly 3 operands") + } +} + +func __asm_proxy_ROUNDPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.ROUNDPD(v[0], v[1], v[2]) + } else { + panic("instruction ROUNDPD takes exactly 3 operands") + } +} + +func __asm_proxy_ROUNDPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.ROUNDPS(v[0], v[1], v[2]) + } else { + panic("instruction ROUNDPS takes exactly 3 operands") + } +} + +func __asm_proxy_ROUNDSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.ROUNDSD(v[0], v[1], v[2]) + } else { + panic("instruction ROUNDSD takes exactly 3 operands") + } +} + +func __asm_proxy_ROUNDSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.ROUNDSS(v[0], v[1], v[2]) + } else { + panic("instruction ROUNDSS takes exactly 3 operands") + } +} + +func __asm_proxy_RSQRTPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RSQRTPS(v[0], v[1]) + } else { + panic("instruction RSQRTPS takes exactly 2 operands") + } +} + +func __asm_proxy_RSQRTSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.RSQRTSS(v[0], v[1]) + } else { + panic("instruction RSQRTSS takes exactly 2 operands") + } +} + +func __asm_proxy_SALB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SALB(v[0], v[1]) + } else { + panic("instruction SALB takes exactly 2 operands") + } +} + +func __asm_proxy_SALL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SALL(v[0], v[1]) + } else { + panic("instruction SALL takes exactly 2 operands") + } +} + +func __asm_proxy_SALQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SALQ(v[0], v[1]) + } else { + panic("instruction SALQ takes exactly 2 operands") + } +} + +func __asm_proxy_SALW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SALW(v[0], v[1]) + } else { + panic("instruction SALW takes exactly 2 operands") + } +} + +func __asm_proxy_SARB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SARB(v[0], v[1]) + } else { + panic("instruction SARB takes exactly 2 operands") + } +} + +func __asm_proxy_SARL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SARL(v[0], v[1]) + } else { + panic("instruction SARL takes exactly 2 operands") + } +} + +func __asm_proxy_SARQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SARQ(v[0], v[1]) + } else { + panic("instruction SARQ takes exactly 2 operands") + } +} + +func __asm_proxy_SARW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SARW(v[0], v[1]) + } else { + panic("instruction SARW takes exactly 2 operands") + } +} + +func __asm_proxy_SARXL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SARXL(v[0], v[1], v[2]) + } else { + panic("instruction SARXL takes exactly 3 operands") + } +} + +func __asm_proxy_SARXQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SARXQ(v[0], v[1], v[2]) + } else { + panic("instruction SARXQ takes exactly 3 operands") + } +} + +func __asm_proxy_SBBB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SBBB(v[0], v[1]) + } else { + panic("instruction SBBB takes exactly 2 operands") + } +} + +func __asm_proxy_SBBL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SBBL(v[0], v[1]) + } else { + panic("instruction SBBL takes exactly 2 operands") + } +} + +func __asm_proxy_SBBQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SBBQ(v[0], v[1]) + } else { + panic("instruction SBBQ takes exactly 2 operands") + } +} + +func __asm_proxy_SBBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SBBW(v[0], v[1]) + } else { + panic("instruction SBBW takes exactly 2 operands") + } +} + +func __asm_proxy_SETA__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETA(v[0]) + } else { + panic("instruction SETA takes exactly 1 operand") + } +} + +func __asm_proxy_SETAE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETAE(v[0]) + } else { + panic("instruction SETAE takes exactly 1 operand") + } +} + +func __asm_proxy_SETB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETB(v[0]) + } else { + panic("instruction SETB takes exactly 1 operand") + } +} + +func __asm_proxy_SETBE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETBE(v[0]) + } else { + panic("instruction SETBE takes exactly 1 operand") + } +} + +func __asm_proxy_SETC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETC(v[0]) + } else { + panic("instruction SETC takes exactly 1 operand") + } +} + +func __asm_proxy_SETE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETE(v[0]) + } else { + panic("instruction SETE takes exactly 1 operand") + } +} + +func __asm_proxy_SETG__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETG(v[0]) + } else { + panic("instruction SETG takes exactly 1 operand") + } +} + +func __asm_proxy_SETGE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETGE(v[0]) + } else { + panic("instruction SETGE takes exactly 1 operand") + } +} + +func __asm_proxy_SETL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETL(v[0]) + } else { + panic("instruction SETL takes exactly 1 operand") + } +} + +func __asm_proxy_SETLE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETLE(v[0]) + } else { + panic("instruction SETLE takes exactly 1 operand") + } +} + +func __asm_proxy_SETNA__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETNA(v[0]) + } else { + panic("instruction SETNA takes exactly 1 operand") + } +} + +func __asm_proxy_SETNAE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETNAE(v[0]) + } else { + panic("instruction SETNAE takes exactly 1 operand") + } +} + +func __asm_proxy_SETNB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETNB(v[0]) + } else { + panic("instruction SETNB takes exactly 1 operand") + } +} + +func __asm_proxy_SETNBE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETNBE(v[0]) + } else { + panic("instruction SETNBE takes exactly 1 operand") + } +} + +func __asm_proxy_SETNC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETNC(v[0]) + } else { + panic("instruction SETNC takes exactly 1 operand") + } +} + +func __asm_proxy_SETNE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETNE(v[0]) + } else { + panic("instruction SETNE takes exactly 1 operand") + } +} + +func __asm_proxy_SETNG__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETNG(v[0]) + } else { + panic("instruction SETNG takes exactly 1 operand") + } +} + +func __asm_proxy_SETNGE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETNGE(v[0]) + } else { + panic("instruction SETNGE takes exactly 1 operand") + } +} + +func __asm_proxy_SETNL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETNL(v[0]) + } else { + panic("instruction SETNL takes exactly 1 operand") + } +} + +func __asm_proxy_SETNLE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETNLE(v[0]) + } else { + panic("instruction SETNLE takes exactly 1 operand") + } +} + +func __asm_proxy_SETNO__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETNO(v[0]) + } else { + panic("instruction SETNO takes exactly 1 operand") + } +} + +func __asm_proxy_SETNP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETNP(v[0]) + } else { + panic("instruction SETNP takes exactly 1 operand") + } +} + +func __asm_proxy_SETNS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETNS(v[0]) + } else { + panic("instruction SETNS takes exactly 1 operand") + } +} + +func __asm_proxy_SETNZ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETNZ(v[0]) + } else { + panic("instruction SETNZ takes exactly 1 operand") + } +} + +func __asm_proxy_SETO__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETO(v[0]) + } else { + panic("instruction SETO takes exactly 1 operand") + } +} + +func __asm_proxy_SETP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETP(v[0]) + } else { + panic("instruction SETP takes exactly 1 operand") + } +} + +func __asm_proxy_SETPE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETPE(v[0]) + } else { + panic("instruction SETPE takes exactly 1 operand") + } +} + +func __asm_proxy_SETPO__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETPO(v[0]) + } else { + panic("instruction SETPO takes exactly 1 operand") + } +} + +func __asm_proxy_SETS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETS(v[0]) + } else { + panic("instruction SETS takes exactly 1 operand") + } +} + +func __asm_proxy_SETZ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.SETZ(v[0]) + } else { + panic("instruction SETZ takes exactly 1 operand") + } +} + +func __asm_proxy_SFENCE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.SFENCE() + } else { + panic("instruction SFENCE takes no operands") + } +} + +func __asm_proxy_SHA1MSG1__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SHA1MSG1(v[0], v[1]) + } else { + panic("instruction SHA1MSG1 takes exactly 2 operands") + } +} + +func __asm_proxy_SHA1MSG2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SHA1MSG2(v[0], v[1]) + } else { + panic("instruction SHA1MSG2 takes exactly 2 operands") + } +} + +func __asm_proxy_SHA1NEXTE__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SHA1NEXTE(v[0], v[1]) + } else { + panic("instruction SHA1NEXTE takes exactly 2 operands") + } +} + +func __asm_proxy_SHA1RNDS4__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SHA1RNDS4(v[0], v[1], v[2]) + } else { + panic("instruction SHA1RNDS4 takes exactly 3 operands") + } +} + +func __asm_proxy_SHA256MSG1__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SHA256MSG1(v[0], v[1]) + } else { + panic("instruction SHA256MSG1 takes exactly 2 operands") + } +} + +func __asm_proxy_SHA256MSG2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SHA256MSG2(v[0], v[1]) + } else { + panic("instruction SHA256MSG2 takes exactly 2 operands") + } +} + +func __asm_proxy_SHA256RNDS2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SHA256RNDS2(v[0], v[1], v[2]) + } else { + panic("instruction SHA256RNDS2 takes exactly 3 operands") + } +} + +func __asm_proxy_SHLB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SHLB(v[0], v[1]) + } else { + panic("instruction SHLB takes exactly 2 operands") + } +} + +func __asm_proxy_SHLDL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SHLDL(v[0], v[1], v[2]) + } else { + panic("instruction SHLDL takes exactly 3 operands") + } +} + +func __asm_proxy_SHLDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SHLDQ(v[0], v[1], v[2]) + } else { + panic("instruction SHLDQ takes exactly 3 operands") + } +} + +func __asm_proxy_SHLDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SHLDW(v[0], v[1], v[2]) + } else { + panic("instruction SHLDW takes exactly 3 operands") + } +} + +func __asm_proxy_SHLL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SHLL(v[0], v[1]) + } else { + panic("instruction SHLL takes exactly 2 operands") + } +} + +func __asm_proxy_SHLQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SHLQ(v[0], v[1]) + } else { + panic("instruction SHLQ takes exactly 2 operands") + } +} + +func __asm_proxy_SHLW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SHLW(v[0], v[1]) + } else { + panic("instruction SHLW takes exactly 2 operands") + } +} + +func __asm_proxy_SHLXL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SHLXL(v[0], v[1], v[2]) + } else { + panic("instruction SHLXL takes exactly 3 operands") + } +} + +func __asm_proxy_SHLXQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SHLXQ(v[0], v[1], v[2]) + } else { + panic("instruction SHLXQ takes exactly 3 operands") + } +} + +func __asm_proxy_SHRB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SHRB(v[0], v[1]) + } else { + panic("instruction SHRB takes exactly 2 operands") + } +} + +func __asm_proxy_SHRDL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SHRDL(v[0], v[1], v[2]) + } else { + panic("instruction SHRDL takes exactly 3 operands") + } +} + +func __asm_proxy_SHRDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SHRDQ(v[0], v[1], v[2]) + } else { + panic("instruction SHRDQ takes exactly 3 operands") + } +} + +func __asm_proxy_SHRDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SHRDW(v[0], v[1], v[2]) + } else { + panic("instruction SHRDW takes exactly 3 operands") + } +} + +func __asm_proxy_SHRL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SHRL(v[0], v[1]) + } else { + panic("instruction SHRL takes exactly 2 operands") + } +} + +func __asm_proxy_SHRQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SHRQ(v[0], v[1]) + } else { + panic("instruction SHRQ takes exactly 2 operands") + } +} + +func __asm_proxy_SHRW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SHRW(v[0], v[1]) + } else { + panic("instruction SHRW takes exactly 2 operands") + } +} + +func __asm_proxy_SHRXL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SHRXL(v[0], v[1], v[2]) + } else { + panic("instruction SHRXL takes exactly 3 operands") + } +} + +func __asm_proxy_SHRXQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SHRXQ(v[0], v[1], v[2]) + } else { + panic("instruction SHRXQ takes exactly 3 operands") + } +} + +func __asm_proxy_SHUFPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SHUFPD(v[0], v[1], v[2]) + } else { + panic("instruction SHUFPD takes exactly 3 operands") + } +} + +func __asm_proxy_SHUFPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.SHUFPS(v[0], v[1], v[2]) + } else { + panic("instruction SHUFPS takes exactly 3 operands") + } +} + +func __asm_proxy_SQRTPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SQRTPD(v[0], v[1]) + } else { + panic("instruction SQRTPD takes exactly 2 operands") + } +} + +func __asm_proxy_SQRTPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SQRTPS(v[0], v[1]) + } else { + panic("instruction SQRTPS takes exactly 2 operands") + } +} + +func __asm_proxy_SQRTSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SQRTSD(v[0], v[1]) + } else { + panic("instruction SQRTSD takes exactly 2 operands") + } +} + +func __asm_proxy_SQRTSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SQRTSS(v[0], v[1]) + } else { + panic("instruction SQRTSS takes exactly 2 operands") + } +} + +func __asm_proxy_STC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.STC() + } else { + panic("instruction STC takes no operands") + } +} + +func __asm_proxy_STD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.STD() + } else { + panic("instruction STD takes no operands") + } +} + +func __asm_proxy_STMXCSR__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.STMXCSR(v[0]) + } else { + panic("instruction STMXCSR takes exactly 1 operand") + } +} + +func __asm_proxy_SUBB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SUBB(v[0], v[1]) + } else { + panic("instruction SUBB takes exactly 2 operands") + } +} + +func __asm_proxy_SUBL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SUBL(v[0], v[1]) + } else { + panic("instruction SUBL takes exactly 2 operands") + } +} + +func __asm_proxy_SUBPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SUBPD(v[0], v[1]) + } else { + panic("instruction SUBPD takes exactly 2 operands") + } +} + +func __asm_proxy_SUBPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SUBPS(v[0], v[1]) + } else { + panic("instruction SUBPS takes exactly 2 operands") + } +} + +func __asm_proxy_SUBQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SUBQ(v[0], v[1]) + } else { + panic("instruction SUBQ takes exactly 2 operands") + } +} + +func __asm_proxy_SUBSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SUBSD(v[0], v[1]) + } else { + panic("instruction SUBSD takes exactly 2 operands") + } +} + +func __asm_proxy_SUBSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SUBSS(v[0], v[1]) + } else { + panic("instruction SUBSS takes exactly 2 operands") + } +} + +func __asm_proxy_SUBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.SUBW(v[0], v[1]) + } else { + panic("instruction SUBW takes exactly 2 operands") + } +} + +func __asm_proxy_SYSCALL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.SYSCALL() + } else { + panic("instruction SYSCALL takes no operands") + } +} + +func __asm_proxy_T1MSKC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.T1MSKC(v[0], v[1]) + } else { + panic("instruction T1MSKC takes exactly 2 operands") + } +} + +func __asm_proxy_TESTB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.TESTB(v[0], v[1]) + } else { + panic("instruction TESTB takes exactly 2 operands") + } +} + +func __asm_proxy_TESTL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.TESTL(v[0], v[1]) + } else { + panic("instruction TESTL takes exactly 2 operands") + } +} + +func __asm_proxy_TESTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.TESTQ(v[0], v[1]) + } else { + panic("instruction TESTQ takes exactly 2 operands") + } +} + +func __asm_proxy_TESTW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.TESTW(v[0], v[1]) + } else { + panic("instruction TESTW takes exactly 2 operands") + } +} + +func __asm_proxy_TZCNTL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.TZCNTL(v[0], v[1]) + } else { + panic("instruction TZCNTL takes exactly 2 operands") + } +} + +func __asm_proxy_TZCNTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.TZCNTQ(v[0], v[1]) + } else { + panic("instruction TZCNTQ takes exactly 2 operands") + } +} + +func __asm_proxy_TZCNTW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.TZCNTW(v[0], v[1]) + } else { + panic("instruction TZCNTW takes exactly 2 operands") + } +} + +func __asm_proxy_TZMSK__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.TZMSK(v[0], v[1]) + } else { + panic("instruction TZMSK takes exactly 2 operands") + } +} + +func __asm_proxy_UCOMISD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.UCOMISD(v[0], v[1]) + } else { + panic("instruction UCOMISD takes exactly 2 operands") + } +} + +func __asm_proxy_UCOMISS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.UCOMISS(v[0], v[1]) + } else { + panic("instruction UCOMISS takes exactly 2 operands") + } +} + +func __asm_proxy_UD2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.UD2() + } else { + panic("instruction UD2 takes no operands") + } +} + +func __asm_proxy_UNPCKHPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.UNPCKHPD(v[0], v[1]) + } else { + panic("instruction UNPCKHPD takes exactly 2 operands") + } +} + +func __asm_proxy_UNPCKHPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.UNPCKHPS(v[0], v[1]) + } else { + panic("instruction UNPCKHPS takes exactly 2 operands") + } +} + +func __asm_proxy_UNPCKLPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.UNPCKLPD(v[0], v[1]) + } else { + panic("instruction UNPCKLPD takes exactly 2 operands") + } +} + +func __asm_proxy_UNPCKLPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.UNPCKLPS(v[0], v[1]) + } else { + panic("instruction UNPCKLPS takes exactly 2 operands") + } +} + +func __asm_proxy_VADDPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VADDPD(v[0], v[1], v[2]) + case 4 : return p.VADDPD(v[0], v[1], v[2], v[3]) + default : panic("instruction VADDPD takes 3 or 4 operands") + } +} + +func __asm_proxy_VADDPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VADDPS(v[0], v[1], v[2]) + case 4 : return p.VADDPS(v[0], v[1], v[2], v[3]) + default : panic("instruction VADDPS takes 3 or 4 operands") + } +} + +func __asm_proxy_VADDSD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VADDSD(v[0], v[1], v[2]) + case 4 : return p.VADDSD(v[0], v[1], v[2], v[3]) + default : panic("instruction VADDSD takes 3 or 4 operands") + } +} + +func __asm_proxy_VADDSS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VADDSS(v[0], v[1], v[2]) + case 4 : return p.VADDSS(v[0], v[1], v[2], v[3]) + default : panic("instruction VADDSS takes 3 or 4 operands") + } +} + +func __asm_proxy_VADDSUBPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VADDSUBPD(v[0], v[1], v[2]) + } else { + panic("instruction VADDSUBPD takes exactly 3 operands") + } +} + +func __asm_proxy_VADDSUBPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VADDSUBPS(v[0], v[1], v[2]) + } else { + panic("instruction VADDSUBPS takes exactly 3 operands") + } +} + +func __asm_proxy_VAESDEC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VAESDEC(v[0], v[1], v[2]) + } else { + panic("instruction VAESDEC takes exactly 3 operands") + } +} + +func __asm_proxy_VAESDECLAST__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VAESDECLAST(v[0], v[1], v[2]) + } else { + panic("instruction VAESDECLAST takes exactly 3 operands") + } +} + +func __asm_proxy_VAESENC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VAESENC(v[0], v[1], v[2]) + } else { + panic("instruction VAESENC takes exactly 3 operands") + } +} + +func __asm_proxy_VAESENCLAST__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VAESENCLAST(v[0], v[1], v[2]) + } else { + panic("instruction VAESENCLAST takes exactly 3 operands") + } +} + +func __asm_proxy_VAESIMC__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VAESIMC(v[0], v[1]) + } else { + panic("instruction VAESIMC takes exactly 2 operands") + } +} + +func __asm_proxy_VAESKEYGENASSIST__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VAESKEYGENASSIST(v[0], v[1], v[2]) + } else { + panic("instruction VAESKEYGENASSIST takes exactly 3 operands") + } +} + +func __asm_proxy_VALIGND__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VALIGND(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VALIGND takes exactly 4 operands") + } +} + +func __asm_proxy_VALIGNQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VALIGNQ(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VALIGNQ takes exactly 4 operands") + } +} + +func __asm_proxy_VANDNPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VANDNPD(v[0], v[1], v[2]) + } else { + panic("instruction VANDNPD takes exactly 3 operands") + } +} + +func __asm_proxy_VANDNPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VANDNPS(v[0], v[1], v[2]) + } else { + panic("instruction VANDNPS takes exactly 3 operands") + } +} + +func __asm_proxy_VANDPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VANDPD(v[0], v[1], v[2]) + } else { + panic("instruction VANDPD takes exactly 3 operands") + } +} + +func __asm_proxy_VANDPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VANDPS(v[0], v[1], v[2]) + } else { + panic("instruction VANDPS takes exactly 3 operands") + } +} + +func __asm_proxy_VBLENDMPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VBLENDMPD(v[0], v[1], v[2]) + } else { + panic("instruction VBLENDMPD takes exactly 3 operands") + } +} + +func __asm_proxy_VBLENDMPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VBLENDMPS(v[0], v[1], v[2]) + } else { + panic("instruction VBLENDMPS takes exactly 3 operands") + } +} + +func __asm_proxy_VBLENDPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VBLENDPD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VBLENDPD takes exactly 4 operands") + } +} + +func __asm_proxy_VBLENDPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VBLENDPS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VBLENDPS takes exactly 4 operands") + } +} + +func __asm_proxy_VBLENDVPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VBLENDVPD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VBLENDVPD takes exactly 4 operands") + } +} + +func __asm_proxy_VBLENDVPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VBLENDVPS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VBLENDVPS takes exactly 4 operands") + } +} + +func __asm_proxy_VBROADCASTF128__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VBROADCASTF128(v[0], v[1]) + } else { + panic("instruction VBROADCASTF128 takes exactly 2 operands") + } +} + +func __asm_proxy_VBROADCASTF32X2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VBROADCASTF32X2(v[0], v[1]) + } else { + panic("instruction VBROADCASTF32X2 takes exactly 2 operands") + } +} + +func __asm_proxy_VBROADCASTF32X4__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VBROADCASTF32X4(v[0], v[1]) + } else { + panic("instruction VBROADCASTF32X4 takes exactly 2 operands") + } +} + +func __asm_proxy_VBROADCASTF32X8__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VBROADCASTF32X8(v[0], v[1]) + } else { + panic("instruction VBROADCASTF32X8 takes exactly 2 operands") + } +} + +func __asm_proxy_VBROADCASTF64X2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VBROADCASTF64X2(v[0], v[1]) + } else { + panic("instruction VBROADCASTF64X2 takes exactly 2 operands") + } +} + +func __asm_proxy_VBROADCASTF64X4__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VBROADCASTF64X4(v[0], v[1]) + } else { + panic("instruction VBROADCASTF64X4 takes exactly 2 operands") + } +} + +func __asm_proxy_VBROADCASTI128__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VBROADCASTI128(v[0], v[1]) + } else { + panic("instruction VBROADCASTI128 takes exactly 2 operands") + } +} + +func __asm_proxy_VBROADCASTI32X2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VBROADCASTI32X2(v[0], v[1]) + } else { + panic("instruction VBROADCASTI32X2 takes exactly 2 operands") + } +} + +func __asm_proxy_VBROADCASTI32X4__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VBROADCASTI32X4(v[0], v[1]) + } else { + panic("instruction VBROADCASTI32X4 takes exactly 2 operands") + } +} + +func __asm_proxy_VBROADCASTI32X8__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VBROADCASTI32X8(v[0], v[1]) + } else { + panic("instruction VBROADCASTI32X8 takes exactly 2 operands") + } +} + +func __asm_proxy_VBROADCASTI64X2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VBROADCASTI64X2(v[0], v[1]) + } else { + panic("instruction VBROADCASTI64X2 takes exactly 2 operands") + } +} + +func __asm_proxy_VBROADCASTI64X4__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VBROADCASTI64X4(v[0], v[1]) + } else { + panic("instruction VBROADCASTI64X4 takes exactly 2 operands") + } +} + +func __asm_proxy_VBROADCASTSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VBROADCASTSD(v[0], v[1]) + } else { + panic("instruction VBROADCASTSD takes exactly 2 operands") + } +} + +func __asm_proxy_VBROADCASTSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VBROADCASTSS(v[0], v[1]) + } else { + panic("instruction VBROADCASTSS takes exactly 2 operands") + } +} + +func __asm_proxy_VCMPPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VCMPPD(v[0], v[1], v[2], v[3]) + case 5 : return p.VCMPPD(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VCMPPD takes 4 or 5 operands") + } +} + +func __asm_proxy_VCMPPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VCMPPS(v[0], v[1], v[2], v[3]) + case 5 : return p.VCMPPS(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VCMPPS takes 4 or 5 operands") + } +} + +func __asm_proxy_VCMPSD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VCMPSD(v[0], v[1], v[2], v[3]) + case 5 : return p.VCMPSD(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VCMPSD takes 4 or 5 operands") + } +} + +func __asm_proxy_VCMPSS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VCMPSS(v[0], v[1], v[2], v[3]) + case 5 : return p.VCMPSS(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VCMPSS takes 4 or 5 operands") + } +} + +func __asm_proxy_VCOMISD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCOMISD(v[0], v[1]) + case 3 : return p.VCOMISD(v[0], v[1], v[2]) + default : panic("instruction VCOMISD takes 2 or 3 operands") + } +} + +func __asm_proxy_VCOMISS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCOMISS(v[0], v[1]) + case 3 : return p.VCOMISS(v[0], v[1], v[2]) + default : panic("instruction VCOMISS takes 2 or 3 operands") + } +} + +func __asm_proxy_VCOMPRESSPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VCOMPRESSPD(v[0], v[1]) + } else { + panic("instruction VCOMPRESSPD takes exactly 2 operands") + } +} + +func __asm_proxy_VCOMPRESSPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VCOMPRESSPS(v[0], v[1]) + } else { + panic("instruction VCOMPRESSPS takes exactly 2 operands") + } +} + +func __asm_proxy_VCVTDQ2PD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VCVTDQ2PD(v[0], v[1]) + } else { + panic("instruction VCVTDQ2PD takes exactly 2 operands") + } +} + +func __asm_proxy_VCVTDQ2PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTDQ2PS(v[0], v[1]) + case 3 : return p.VCVTDQ2PS(v[0], v[1], v[2]) + default : panic("instruction VCVTDQ2PS takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTPD2DQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTPD2DQ(v[0], v[1]) + case 3 : return p.VCVTPD2DQ(v[0], v[1], v[2]) + default : panic("instruction VCVTPD2DQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTPD2PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTPD2PS(v[0], v[1]) + case 3 : return p.VCVTPD2PS(v[0], v[1], v[2]) + default : panic("instruction VCVTPD2PS takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTPD2QQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTPD2QQ(v[0], v[1]) + case 3 : return p.VCVTPD2QQ(v[0], v[1], v[2]) + default : panic("instruction VCVTPD2QQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTPD2UDQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTPD2UDQ(v[0], v[1]) + case 3 : return p.VCVTPD2UDQ(v[0], v[1], v[2]) + default : panic("instruction VCVTPD2UDQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTPD2UQQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTPD2UQQ(v[0], v[1]) + case 3 : return p.VCVTPD2UQQ(v[0], v[1], v[2]) + default : panic("instruction VCVTPD2UQQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTPH2PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTPH2PS(v[0], v[1]) + case 3 : return p.VCVTPH2PS(v[0], v[1], v[2]) + default : panic("instruction VCVTPH2PS takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTPS2DQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTPS2DQ(v[0], v[1]) + case 3 : return p.VCVTPS2DQ(v[0], v[1], v[2]) + default : panic("instruction VCVTPS2DQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTPS2PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTPS2PD(v[0], v[1]) + case 3 : return p.VCVTPS2PD(v[0], v[1], v[2]) + default : panic("instruction VCVTPS2PD takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTPS2PH__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VCVTPS2PH(v[0], v[1], v[2]) + case 4 : return p.VCVTPS2PH(v[0], v[1], v[2], v[3]) + default : panic("instruction VCVTPS2PH takes 3 or 4 operands") + } +} + +func __asm_proxy_VCVTPS2QQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTPS2QQ(v[0], v[1]) + case 3 : return p.VCVTPS2QQ(v[0], v[1], v[2]) + default : panic("instruction VCVTPS2QQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTPS2UDQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTPS2UDQ(v[0], v[1]) + case 3 : return p.VCVTPS2UDQ(v[0], v[1], v[2]) + default : panic("instruction VCVTPS2UDQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTPS2UQQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTPS2UQQ(v[0], v[1]) + case 3 : return p.VCVTPS2UQQ(v[0], v[1], v[2]) + default : panic("instruction VCVTPS2UQQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTQQ2PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTQQ2PD(v[0], v[1]) + case 3 : return p.VCVTQQ2PD(v[0], v[1], v[2]) + default : panic("instruction VCVTQQ2PD takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTQQ2PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTQQ2PS(v[0], v[1]) + case 3 : return p.VCVTQQ2PS(v[0], v[1], v[2]) + default : panic("instruction VCVTQQ2PS takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTSD2SI__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTSD2SI(v[0], v[1]) + case 3 : return p.VCVTSD2SI(v[0], v[1], v[2]) + default : panic("instruction VCVTSD2SI takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTSD2SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VCVTSD2SS(v[0], v[1], v[2]) + case 4 : return p.VCVTSD2SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VCVTSD2SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VCVTSD2USI__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTSD2USI(v[0], v[1]) + case 3 : return p.VCVTSD2USI(v[0], v[1], v[2]) + default : panic("instruction VCVTSD2USI takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTSI2SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VCVTSI2SD(v[0], v[1], v[2]) + case 4 : return p.VCVTSI2SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VCVTSI2SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VCVTSI2SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VCVTSI2SS(v[0], v[1], v[2]) + case 4 : return p.VCVTSI2SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VCVTSI2SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VCVTSS2SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VCVTSS2SD(v[0], v[1], v[2]) + case 4 : return p.VCVTSS2SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VCVTSS2SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VCVTSS2SI__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTSS2SI(v[0], v[1]) + case 3 : return p.VCVTSS2SI(v[0], v[1], v[2]) + default : panic("instruction VCVTSS2SI takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTSS2USI__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTSS2USI(v[0], v[1]) + case 3 : return p.VCVTSS2USI(v[0], v[1], v[2]) + default : panic("instruction VCVTSS2USI takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTTPD2DQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTTPD2DQ(v[0], v[1]) + case 3 : return p.VCVTTPD2DQ(v[0], v[1], v[2]) + default : panic("instruction VCVTTPD2DQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTTPD2QQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTTPD2QQ(v[0], v[1]) + case 3 : return p.VCVTTPD2QQ(v[0], v[1], v[2]) + default : panic("instruction VCVTTPD2QQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTTPD2UDQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTTPD2UDQ(v[0], v[1]) + case 3 : return p.VCVTTPD2UDQ(v[0], v[1], v[2]) + default : panic("instruction VCVTTPD2UDQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTTPD2UQQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTTPD2UQQ(v[0], v[1]) + case 3 : return p.VCVTTPD2UQQ(v[0], v[1], v[2]) + default : panic("instruction VCVTTPD2UQQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTTPS2DQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTTPS2DQ(v[0], v[1]) + case 3 : return p.VCVTTPS2DQ(v[0], v[1], v[2]) + default : panic("instruction VCVTTPS2DQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTTPS2QQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTTPS2QQ(v[0], v[1]) + case 3 : return p.VCVTTPS2QQ(v[0], v[1], v[2]) + default : panic("instruction VCVTTPS2QQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTTPS2UDQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTTPS2UDQ(v[0], v[1]) + case 3 : return p.VCVTTPS2UDQ(v[0], v[1], v[2]) + default : panic("instruction VCVTTPS2UDQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTTPS2UQQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTTPS2UQQ(v[0], v[1]) + case 3 : return p.VCVTTPS2UQQ(v[0], v[1], v[2]) + default : panic("instruction VCVTTPS2UQQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTTSD2SI__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTTSD2SI(v[0], v[1]) + case 3 : return p.VCVTTSD2SI(v[0], v[1], v[2]) + default : panic("instruction VCVTTSD2SI takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTTSD2USI__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTTSD2USI(v[0], v[1]) + case 3 : return p.VCVTTSD2USI(v[0], v[1], v[2]) + default : panic("instruction VCVTTSD2USI takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTTSS2SI__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTTSS2SI(v[0], v[1]) + case 3 : return p.VCVTTSS2SI(v[0], v[1], v[2]) + default : panic("instruction VCVTTSS2SI takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTTSS2USI__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTTSS2USI(v[0], v[1]) + case 3 : return p.VCVTTSS2USI(v[0], v[1], v[2]) + default : panic("instruction VCVTTSS2USI takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTUDQ2PD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VCVTUDQ2PD(v[0], v[1]) + } else { + panic("instruction VCVTUDQ2PD takes exactly 2 operands") + } +} + +func __asm_proxy_VCVTUDQ2PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTUDQ2PS(v[0], v[1]) + case 3 : return p.VCVTUDQ2PS(v[0], v[1], v[2]) + default : panic("instruction VCVTUDQ2PS takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTUQQ2PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTUQQ2PD(v[0], v[1]) + case 3 : return p.VCVTUQQ2PD(v[0], v[1], v[2]) + default : panic("instruction VCVTUQQ2PD takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTUQQ2PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VCVTUQQ2PS(v[0], v[1]) + case 3 : return p.VCVTUQQ2PS(v[0], v[1], v[2]) + default : panic("instruction VCVTUQQ2PS takes 2 or 3 operands") + } +} + +func __asm_proxy_VCVTUSI2SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VCVTUSI2SD(v[0], v[1], v[2]) + case 4 : return p.VCVTUSI2SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VCVTUSI2SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VCVTUSI2SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VCVTUSI2SS(v[0], v[1], v[2]) + case 4 : return p.VCVTUSI2SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VCVTUSI2SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VDBPSADBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VDBPSADBW(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VDBPSADBW takes exactly 4 operands") + } +} + +func __asm_proxy_VDIVPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VDIVPD(v[0], v[1], v[2]) + case 4 : return p.VDIVPD(v[0], v[1], v[2], v[3]) + default : panic("instruction VDIVPD takes 3 or 4 operands") + } +} + +func __asm_proxy_VDIVPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VDIVPS(v[0], v[1], v[2]) + case 4 : return p.VDIVPS(v[0], v[1], v[2], v[3]) + default : panic("instruction VDIVPS takes 3 or 4 operands") + } +} + +func __asm_proxy_VDIVSD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VDIVSD(v[0], v[1], v[2]) + case 4 : return p.VDIVSD(v[0], v[1], v[2], v[3]) + default : panic("instruction VDIVSD takes 3 or 4 operands") + } +} + +func __asm_proxy_VDIVSS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VDIVSS(v[0], v[1], v[2]) + case 4 : return p.VDIVSS(v[0], v[1], v[2], v[3]) + default : panic("instruction VDIVSS takes 3 or 4 operands") + } +} + +func __asm_proxy_VDPPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VDPPD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VDPPD takes exactly 4 operands") + } +} + +func __asm_proxy_VDPPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VDPPS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VDPPS takes exactly 4 operands") + } +} + +func __asm_proxy_VEXP2PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VEXP2PD(v[0], v[1]) + case 3 : return p.VEXP2PD(v[0], v[1], v[2]) + default : panic("instruction VEXP2PD takes 2 or 3 operands") + } +} + +func __asm_proxy_VEXP2PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VEXP2PS(v[0], v[1]) + case 3 : return p.VEXP2PS(v[0], v[1], v[2]) + default : panic("instruction VEXP2PS takes 2 or 3 operands") + } +} + +func __asm_proxy_VEXPANDPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VEXPANDPD(v[0], v[1]) + } else { + panic("instruction VEXPANDPD takes exactly 2 operands") + } +} + +func __asm_proxy_VEXPANDPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VEXPANDPS(v[0], v[1]) + } else { + panic("instruction VEXPANDPS takes exactly 2 operands") + } +} + +func __asm_proxy_VEXTRACTF128__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VEXTRACTF128(v[0], v[1], v[2]) + } else { + panic("instruction VEXTRACTF128 takes exactly 3 operands") + } +} + +func __asm_proxy_VEXTRACTF32X4__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VEXTRACTF32X4(v[0], v[1], v[2]) + } else { + panic("instruction VEXTRACTF32X4 takes exactly 3 operands") + } +} + +func __asm_proxy_VEXTRACTF32X8__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VEXTRACTF32X8(v[0], v[1], v[2]) + } else { + panic("instruction VEXTRACTF32X8 takes exactly 3 operands") + } +} + +func __asm_proxy_VEXTRACTF64X2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VEXTRACTF64X2(v[0], v[1], v[2]) + } else { + panic("instruction VEXTRACTF64X2 takes exactly 3 operands") + } +} + +func __asm_proxy_VEXTRACTF64X4__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VEXTRACTF64X4(v[0], v[1], v[2]) + } else { + panic("instruction VEXTRACTF64X4 takes exactly 3 operands") + } +} + +func __asm_proxy_VEXTRACTI128__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VEXTRACTI128(v[0], v[1], v[2]) + } else { + panic("instruction VEXTRACTI128 takes exactly 3 operands") + } +} + +func __asm_proxy_VEXTRACTI32X4__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VEXTRACTI32X4(v[0], v[1], v[2]) + } else { + panic("instruction VEXTRACTI32X4 takes exactly 3 operands") + } +} + +func __asm_proxy_VEXTRACTI32X8__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VEXTRACTI32X8(v[0], v[1], v[2]) + } else { + panic("instruction VEXTRACTI32X8 takes exactly 3 operands") + } +} + +func __asm_proxy_VEXTRACTI64X2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VEXTRACTI64X2(v[0], v[1], v[2]) + } else { + panic("instruction VEXTRACTI64X2 takes exactly 3 operands") + } +} + +func __asm_proxy_VEXTRACTI64X4__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VEXTRACTI64X4(v[0], v[1], v[2]) + } else { + panic("instruction VEXTRACTI64X4 takes exactly 3 operands") + } +} + +func __asm_proxy_VEXTRACTPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VEXTRACTPS(v[0], v[1], v[2]) + } else { + panic("instruction VEXTRACTPS takes exactly 3 operands") + } +} + +func __asm_proxy_VFIXUPIMMPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VFIXUPIMMPD(v[0], v[1], v[2], v[3]) + case 5 : return p.VFIXUPIMMPD(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VFIXUPIMMPD takes 4 or 5 operands") + } +} + +func __asm_proxy_VFIXUPIMMPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VFIXUPIMMPS(v[0], v[1], v[2], v[3]) + case 5 : return p.VFIXUPIMMPS(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VFIXUPIMMPS takes 4 or 5 operands") + } +} + +func __asm_proxy_VFIXUPIMMSD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VFIXUPIMMSD(v[0], v[1], v[2], v[3]) + case 5 : return p.VFIXUPIMMSD(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VFIXUPIMMSD takes 4 or 5 operands") + } +} + +func __asm_proxy_VFIXUPIMMSS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VFIXUPIMMSS(v[0], v[1], v[2], v[3]) + case 5 : return p.VFIXUPIMMSS(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VFIXUPIMMSS takes 4 or 5 operands") + } +} + +func __asm_proxy_VFMADD132PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADD132PD(v[0], v[1], v[2]) + case 4 : return p.VFMADD132PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADD132PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADD132PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADD132PS(v[0], v[1], v[2]) + case 4 : return p.VFMADD132PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADD132PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADD132SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADD132SD(v[0], v[1], v[2]) + case 4 : return p.VFMADD132SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADD132SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADD132SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADD132SS(v[0], v[1], v[2]) + case 4 : return p.VFMADD132SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADD132SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADD213PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADD213PD(v[0], v[1], v[2]) + case 4 : return p.VFMADD213PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADD213PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADD213PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADD213PS(v[0], v[1], v[2]) + case 4 : return p.VFMADD213PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADD213PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADD213SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADD213SD(v[0], v[1], v[2]) + case 4 : return p.VFMADD213SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADD213SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADD213SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADD213SS(v[0], v[1], v[2]) + case 4 : return p.VFMADD213SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADD213SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADD231PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADD231PD(v[0], v[1], v[2]) + case 4 : return p.VFMADD231PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADD231PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADD231PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADD231PS(v[0], v[1], v[2]) + case 4 : return p.VFMADD231PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADD231PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADD231SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADD231SD(v[0], v[1], v[2]) + case 4 : return p.VFMADD231SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADD231SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADD231SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADD231SS(v[0], v[1], v[2]) + case 4 : return p.VFMADD231SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADD231SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADDPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFMADDPD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFMADDPD takes exactly 4 operands") + } +} + +func __asm_proxy_VFMADDPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFMADDPS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFMADDPS takes exactly 4 operands") + } +} + +func __asm_proxy_VFMADDSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFMADDSD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFMADDSD takes exactly 4 operands") + } +} + +func __asm_proxy_VFMADDSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFMADDSS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFMADDSS takes exactly 4 operands") + } +} + +func __asm_proxy_VFMADDSUB132PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADDSUB132PD(v[0], v[1], v[2]) + case 4 : return p.VFMADDSUB132PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADDSUB132PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADDSUB132PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADDSUB132PS(v[0], v[1], v[2]) + case 4 : return p.VFMADDSUB132PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADDSUB132PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADDSUB213PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADDSUB213PD(v[0], v[1], v[2]) + case 4 : return p.VFMADDSUB213PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADDSUB213PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADDSUB213PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADDSUB213PS(v[0], v[1], v[2]) + case 4 : return p.VFMADDSUB213PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADDSUB213PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADDSUB231PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADDSUB231PD(v[0], v[1], v[2]) + case 4 : return p.VFMADDSUB231PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADDSUB231PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADDSUB231PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMADDSUB231PS(v[0], v[1], v[2]) + case 4 : return p.VFMADDSUB231PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMADDSUB231PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMADDSUBPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFMADDSUBPD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFMADDSUBPD takes exactly 4 operands") + } +} + +func __asm_proxy_VFMADDSUBPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFMADDSUBPS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFMADDSUBPS takes exactly 4 operands") + } +} + +func __asm_proxy_VFMSUB132PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUB132PD(v[0], v[1], v[2]) + case 4 : return p.VFMSUB132PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUB132PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUB132PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUB132PS(v[0], v[1], v[2]) + case 4 : return p.VFMSUB132PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUB132PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUB132SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUB132SD(v[0], v[1], v[2]) + case 4 : return p.VFMSUB132SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUB132SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUB132SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUB132SS(v[0], v[1], v[2]) + case 4 : return p.VFMSUB132SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUB132SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUB213PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUB213PD(v[0], v[1], v[2]) + case 4 : return p.VFMSUB213PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUB213PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUB213PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUB213PS(v[0], v[1], v[2]) + case 4 : return p.VFMSUB213PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUB213PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUB213SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUB213SD(v[0], v[1], v[2]) + case 4 : return p.VFMSUB213SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUB213SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUB213SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUB213SS(v[0], v[1], v[2]) + case 4 : return p.VFMSUB213SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUB213SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUB231PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUB231PD(v[0], v[1], v[2]) + case 4 : return p.VFMSUB231PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUB231PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUB231PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUB231PS(v[0], v[1], v[2]) + case 4 : return p.VFMSUB231PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUB231PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUB231SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUB231SD(v[0], v[1], v[2]) + case 4 : return p.VFMSUB231SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUB231SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUB231SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUB231SS(v[0], v[1], v[2]) + case 4 : return p.VFMSUB231SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUB231SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUBADD132PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUBADD132PD(v[0], v[1], v[2]) + case 4 : return p.VFMSUBADD132PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUBADD132PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUBADD132PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUBADD132PS(v[0], v[1], v[2]) + case 4 : return p.VFMSUBADD132PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUBADD132PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUBADD213PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUBADD213PD(v[0], v[1], v[2]) + case 4 : return p.VFMSUBADD213PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUBADD213PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUBADD213PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUBADD213PS(v[0], v[1], v[2]) + case 4 : return p.VFMSUBADD213PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUBADD213PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUBADD231PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUBADD231PD(v[0], v[1], v[2]) + case 4 : return p.VFMSUBADD231PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUBADD231PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUBADD231PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFMSUBADD231PS(v[0], v[1], v[2]) + case 4 : return p.VFMSUBADD231PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFMSUBADD231PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFMSUBADDPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFMSUBADDPD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFMSUBADDPD takes exactly 4 operands") + } +} + +func __asm_proxy_VFMSUBADDPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFMSUBADDPS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFMSUBADDPS takes exactly 4 operands") + } +} + +func __asm_proxy_VFMSUBPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFMSUBPD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFMSUBPD takes exactly 4 operands") + } +} + +func __asm_proxy_VFMSUBPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFMSUBPS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFMSUBPS takes exactly 4 operands") + } +} + +func __asm_proxy_VFMSUBSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFMSUBSD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFMSUBSD takes exactly 4 operands") + } +} + +func __asm_proxy_VFMSUBSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFMSUBSS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFMSUBSS takes exactly 4 operands") + } +} + +func __asm_proxy_VFNMADD132PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMADD132PD(v[0], v[1], v[2]) + case 4 : return p.VFNMADD132PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMADD132PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMADD132PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMADD132PS(v[0], v[1], v[2]) + case 4 : return p.VFNMADD132PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMADD132PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMADD132SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMADD132SD(v[0], v[1], v[2]) + case 4 : return p.VFNMADD132SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMADD132SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMADD132SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMADD132SS(v[0], v[1], v[2]) + case 4 : return p.VFNMADD132SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMADD132SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMADD213PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMADD213PD(v[0], v[1], v[2]) + case 4 : return p.VFNMADD213PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMADD213PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMADD213PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMADD213PS(v[0], v[1], v[2]) + case 4 : return p.VFNMADD213PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMADD213PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMADD213SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMADD213SD(v[0], v[1], v[2]) + case 4 : return p.VFNMADD213SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMADD213SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMADD213SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMADD213SS(v[0], v[1], v[2]) + case 4 : return p.VFNMADD213SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMADD213SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMADD231PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMADD231PD(v[0], v[1], v[2]) + case 4 : return p.VFNMADD231PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMADD231PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMADD231PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMADD231PS(v[0], v[1], v[2]) + case 4 : return p.VFNMADD231PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMADD231PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMADD231SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMADD231SD(v[0], v[1], v[2]) + case 4 : return p.VFNMADD231SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMADD231SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMADD231SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMADD231SS(v[0], v[1], v[2]) + case 4 : return p.VFNMADD231SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMADD231SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMADDPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFNMADDPD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFNMADDPD takes exactly 4 operands") + } +} + +func __asm_proxy_VFNMADDPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFNMADDPS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFNMADDPS takes exactly 4 operands") + } +} + +func __asm_proxy_VFNMADDSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFNMADDSD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFNMADDSD takes exactly 4 operands") + } +} + +func __asm_proxy_VFNMADDSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFNMADDSS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFNMADDSS takes exactly 4 operands") + } +} + +func __asm_proxy_VFNMSUB132PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMSUB132PD(v[0], v[1], v[2]) + case 4 : return p.VFNMSUB132PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMSUB132PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMSUB132PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMSUB132PS(v[0], v[1], v[2]) + case 4 : return p.VFNMSUB132PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMSUB132PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMSUB132SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMSUB132SD(v[0], v[1], v[2]) + case 4 : return p.VFNMSUB132SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMSUB132SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMSUB132SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMSUB132SS(v[0], v[1], v[2]) + case 4 : return p.VFNMSUB132SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMSUB132SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMSUB213PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMSUB213PD(v[0], v[1], v[2]) + case 4 : return p.VFNMSUB213PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMSUB213PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMSUB213PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMSUB213PS(v[0], v[1], v[2]) + case 4 : return p.VFNMSUB213PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMSUB213PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMSUB213SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMSUB213SD(v[0], v[1], v[2]) + case 4 : return p.VFNMSUB213SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMSUB213SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMSUB213SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMSUB213SS(v[0], v[1], v[2]) + case 4 : return p.VFNMSUB213SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMSUB213SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMSUB231PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMSUB231PD(v[0], v[1], v[2]) + case 4 : return p.VFNMSUB231PD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMSUB231PD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMSUB231PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMSUB231PS(v[0], v[1], v[2]) + case 4 : return p.VFNMSUB231PS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMSUB231PS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMSUB231SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMSUB231SD(v[0], v[1], v[2]) + case 4 : return p.VFNMSUB231SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMSUB231SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMSUB231SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VFNMSUB231SS(v[0], v[1], v[2]) + case 4 : return p.VFNMSUB231SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VFNMSUB231SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VFNMSUBPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFNMSUBPD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFNMSUBPD takes exactly 4 operands") + } +} + +func __asm_proxy_VFNMSUBPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFNMSUBPS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFNMSUBPS takes exactly 4 operands") + } +} + +func __asm_proxy_VFNMSUBSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFNMSUBSD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFNMSUBSD takes exactly 4 operands") + } +} + +func __asm_proxy_VFNMSUBSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VFNMSUBSS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VFNMSUBSS takes exactly 4 operands") + } +} + +func __asm_proxy_VFPCLASSPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VFPCLASSPD(v[0], v[1], v[2]) + } else { + panic("instruction VFPCLASSPD takes exactly 3 operands") + } +} + +func __asm_proxy_VFPCLASSPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VFPCLASSPS(v[0], v[1], v[2]) + } else { + panic("instruction VFPCLASSPS takes exactly 3 operands") + } +} + +func __asm_proxy_VFPCLASSSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VFPCLASSSD(v[0], v[1], v[2]) + } else { + panic("instruction VFPCLASSSD takes exactly 3 operands") + } +} + +func __asm_proxy_VFPCLASSSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VFPCLASSSS(v[0], v[1], v[2]) + } else { + panic("instruction VFPCLASSSS takes exactly 3 operands") + } +} + +func __asm_proxy_VFRCZPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VFRCZPD(v[0], v[1]) + } else { + panic("instruction VFRCZPD takes exactly 2 operands") + } +} + +func __asm_proxy_VFRCZPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VFRCZPS(v[0], v[1]) + } else { + panic("instruction VFRCZPS takes exactly 2 operands") + } +} + +func __asm_proxy_VFRCZSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VFRCZSD(v[0], v[1]) + } else { + panic("instruction VFRCZSD takes exactly 2 operands") + } +} + +func __asm_proxy_VFRCZSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VFRCZSS(v[0], v[1]) + } else { + panic("instruction VFRCZSS takes exactly 2 operands") + } +} + +func __asm_proxy_VGATHERDPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VGATHERDPD(v[0], v[1]) + case 3 : return p.VGATHERDPD(v[0], v[1], v[2]) + default : panic("instruction VGATHERDPD takes 2 or 3 operands") + } +} + +func __asm_proxy_VGATHERDPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VGATHERDPS(v[0], v[1]) + case 3 : return p.VGATHERDPS(v[0], v[1], v[2]) + default : panic("instruction VGATHERDPS takes 2 or 3 operands") + } +} + +func __asm_proxy_VGATHERPF0DPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VGATHERPF0DPD(v[0]) + } else { + panic("instruction VGATHERPF0DPD takes exactly 1 operand") + } +} + +func __asm_proxy_VGATHERPF0DPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VGATHERPF0DPS(v[0]) + } else { + panic("instruction VGATHERPF0DPS takes exactly 1 operand") + } +} + +func __asm_proxy_VGATHERPF0QPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VGATHERPF0QPD(v[0]) + } else { + panic("instruction VGATHERPF0QPD takes exactly 1 operand") + } +} + +func __asm_proxy_VGATHERPF0QPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VGATHERPF0QPS(v[0]) + } else { + panic("instruction VGATHERPF0QPS takes exactly 1 operand") + } +} + +func __asm_proxy_VGATHERPF1DPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VGATHERPF1DPD(v[0]) + } else { + panic("instruction VGATHERPF1DPD takes exactly 1 operand") + } +} + +func __asm_proxy_VGATHERPF1DPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VGATHERPF1DPS(v[0]) + } else { + panic("instruction VGATHERPF1DPS takes exactly 1 operand") + } +} + +func __asm_proxy_VGATHERPF1QPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VGATHERPF1QPD(v[0]) + } else { + panic("instruction VGATHERPF1QPD takes exactly 1 operand") + } +} + +func __asm_proxy_VGATHERPF1QPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VGATHERPF1QPS(v[0]) + } else { + panic("instruction VGATHERPF1QPS takes exactly 1 operand") + } +} + +func __asm_proxy_VGATHERQPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VGATHERQPD(v[0], v[1]) + case 3 : return p.VGATHERQPD(v[0], v[1], v[2]) + default : panic("instruction VGATHERQPD takes 2 or 3 operands") + } +} + +func __asm_proxy_VGATHERQPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VGATHERQPS(v[0], v[1]) + case 3 : return p.VGATHERQPS(v[0], v[1], v[2]) + default : panic("instruction VGATHERQPS takes 2 or 3 operands") + } +} + +func __asm_proxy_VGETEXPPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VGETEXPPD(v[0], v[1]) + case 3 : return p.VGETEXPPD(v[0], v[1], v[2]) + default : panic("instruction VGETEXPPD takes 2 or 3 operands") + } +} + +func __asm_proxy_VGETEXPPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VGETEXPPS(v[0], v[1]) + case 3 : return p.VGETEXPPS(v[0], v[1], v[2]) + default : panic("instruction VGETEXPPS takes 2 or 3 operands") + } +} + +func __asm_proxy_VGETEXPSD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VGETEXPSD(v[0], v[1], v[2]) + case 4 : return p.VGETEXPSD(v[0], v[1], v[2], v[3]) + default : panic("instruction VGETEXPSD takes 3 or 4 operands") + } +} + +func __asm_proxy_VGETEXPSS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VGETEXPSS(v[0], v[1], v[2]) + case 4 : return p.VGETEXPSS(v[0], v[1], v[2], v[3]) + default : panic("instruction VGETEXPSS takes 3 or 4 operands") + } +} + +func __asm_proxy_VGETMANTPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VGETMANTPD(v[0], v[1], v[2]) + case 4 : return p.VGETMANTPD(v[0], v[1], v[2], v[3]) + default : panic("instruction VGETMANTPD takes 3 or 4 operands") + } +} + +func __asm_proxy_VGETMANTPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VGETMANTPS(v[0], v[1], v[2]) + case 4 : return p.VGETMANTPS(v[0], v[1], v[2], v[3]) + default : panic("instruction VGETMANTPS takes 3 or 4 operands") + } +} + +func __asm_proxy_VGETMANTSD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VGETMANTSD(v[0], v[1], v[2], v[3]) + case 5 : return p.VGETMANTSD(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VGETMANTSD takes 4 or 5 operands") + } +} + +func __asm_proxy_VGETMANTSS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VGETMANTSS(v[0], v[1], v[2], v[3]) + case 5 : return p.VGETMANTSS(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VGETMANTSS takes 4 or 5 operands") + } +} + +func __asm_proxy_VHADDPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VHADDPD(v[0], v[1], v[2]) + } else { + panic("instruction VHADDPD takes exactly 3 operands") + } +} + +func __asm_proxy_VHADDPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VHADDPS(v[0], v[1], v[2]) + } else { + panic("instruction VHADDPS takes exactly 3 operands") + } +} + +func __asm_proxy_VHSUBPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VHSUBPD(v[0], v[1], v[2]) + } else { + panic("instruction VHSUBPD takes exactly 3 operands") + } +} + +func __asm_proxy_VHSUBPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VHSUBPS(v[0], v[1], v[2]) + } else { + panic("instruction VHSUBPS takes exactly 3 operands") + } +} + +func __asm_proxy_VINSERTF128__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VINSERTF128(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VINSERTF128 takes exactly 4 operands") + } +} + +func __asm_proxy_VINSERTF32X4__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VINSERTF32X4(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VINSERTF32X4 takes exactly 4 operands") + } +} + +func __asm_proxy_VINSERTF32X8__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VINSERTF32X8(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VINSERTF32X8 takes exactly 4 operands") + } +} + +func __asm_proxy_VINSERTF64X2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VINSERTF64X2(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VINSERTF64X2 takes exactly 4 operands") + } +} + +func __asm_proxy_VINSERTF64X4__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VINSERTF64X4(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VINSERTF64X4 takes exactly 4 operands") + } +} + +func __asm_proxy_VINSERTI128__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VINSERTI128(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VINSERTI128 takes exactly 4 operands") + } +} + +func __asm_proxy_VINSERTI32X4__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VINSERTI32X4(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VINSERTI32X4 takes exactly 4 operands") + } +} + +func __asm_proxy_VINSERTI32X8__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VINSERTI32X8(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VINSERTI32X8 takes exactly 4 operands") + } +} + +func __asm_proxy_VINSERTI64X2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VINSERTI64X2(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VINSERTI64X2 takes exactly 4 operands") + } +} + +func __asm_proxy_VINSERTI64X4__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VINSERTI64X4(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VINSERTI64X4 takes exactly 4 operands") + } +} + +func __asm_proxy_VINSERTPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VINSERTPS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VINSERTPS takes exactly 4 operands") + } +} + +func __asm_proxy_VLDDQU__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VLDDQU(v[0], v[1]) + } else { + panic("instruction VLDDQU takes exactly 2 operands") + } +} + +func __asm_proxy_VLDMXCSR__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VLDMXCSR(v[0]) + } else { + panic("instruction VLDMXCSR takes exactly 1 operand") + } +} + +func __asm_proxy_VMASKMOVDQU__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMASKMOVDQU(v[0], v[1]) + } else { + panic("instruction VMASKMOVDQU takes exactly 2 operands") + } +} + +func __asm_proxy_VMASKMOVPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VMASKMOVPD(v[0], v[1], v[2]) + } else { + panic("instruction VMASKMOVPD takes exactly 3 operands") + } +} + +func __asm_proxy_VMASKMOVPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VMASKMOVPS(v[0], v[1], v[2]) + } else { + panic("instruction VMASKMOVPS takes exactly 3 operands") + } +} + +func __asm_proxy_VMAXPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VMAXPD(v[0], v[1], v[2]) + case 4 : return p.VMAXPD(v[0], v[1], v[2], v[3]) + default : panic("instruction VMAXPD takes 3 or 4 operands") + } +} + +func __asm_proxy_VMAXPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VMAXPS(v[0], v[1], v[2]) + case 4 : return p.VMAXPS(v[0], v[1], v[2], v[3]) + default : panic("instruction VMAXPS takes 3 or 4 operands") + } +} + +func __asm_proxy_VMAXSD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VMAXSD(v[0], v[1], v[2]) + case 4 : return p.VMAXSD(v[0], v[1], v[2], v[3]) + default : panic("instruction VMAXSD takes 3 or 4 operands") + } +} + +func __asm_proxy_VMAXSS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VMAXSS(v[0], v[1], v[2]) + case 4 : return p.VMAXSS(v[0], v[1], v[2], v[3]) + default : panic("instruction VMAXSS takes 3 or 4 operands") + } +} + +func __asm_proxy_VMINPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VMINPD(v[0], v[1], v[2]) + case 4 : return p.VMINPD(v[0], v[1], v[2], v[3]) + default : panic("instruction VMINPD takes 3 or 4 operands") + } +} + +func __asm_proxy_VMINPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VMINPS(v[0], v[1], v[2]) + case 4 : return p.VMINPS(v[0], v[1], v[2], v[3]) + default : panic("instruction VMINPS takes 3 or 4 operands") + } +} + +func __asm_proxy_VMINSD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VMINSD(v[0], v[1], v[2]) + case 4 : return p.VMINSD(v[0], v[1], v[2], v[3]) + default : panic("instruction VMINSD takes 3 or 4 operands") + } +} + +func __asm_proxy_VMINSS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VMINSS(v[0], v[1], v[2]) + case 4 : return p.VMINSS(v[0], v[1], v[2], v[3]) + default : panic("instruction VMINSS takes 3 or 4 operands") + } +} + +func __asm_proxy_VMOVAPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVAPD(v[0], v[1]) + } else { + panic("instruction VMOVAPD takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVAPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVAPS(v[0], v[1]) + } else { + panic("instruction VMOVAPS takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVD(v[0], v[1]) + } else { + panic("instruction VMOVD takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVDDUP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVDDUP(v[0], v[1]) + } else { + panic("instruction VMOVDDUP takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVDQA__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVDQA(v[0], v[1]) + } else { + panic("instruction VMOVDQA takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVDQA32__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVDQA32(v[0], v[1]) + } else { + panic("instruction VMOVDQA32 takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVDQA64__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVDQA64(v[0], v[1]) + } else { + panic("instruction VMOVDQA64 takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVDQU__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVDQU(v[0], v[1]) + } else { + panic("instruction VMOVDQU takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVDQU16__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVDQU16(v[0], v[1]) + } else { + panic("instruction VMOVDQU16 takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVDQU32__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVDQU32(v[0], v[1]) + } else { + panic("instruction VMOVDQU32 takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVDQU64__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVDQU64(v[0], v[1]) + } else { + panic("instruction VMOVDQU64 takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVDQU8__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVDQU8(v[0], v[1]) + } else { + panic("instruction VMOVDQU8 takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVHLPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VMOVHLPS(v[0], v[1], v[2]) + } else { + panic("instruction VMOVHLPS takes exactly 3 operands") + } +} + +func __asm_proxy_VMOVHPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VMOVHPD(v[0], v[1]) + case 3 : return p.VMOVHPD(v[0], v[1], v[2]) + default : panic("instruction VMOVHPD takes 2 or 3 operands") + } +} + +func __asm_proxy_VMOVHPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VMOVHPS(v[0], v[1]) + case 3 : return p.VMOVHPS(v[0], v[1], v[2]) + default : panic("instruction VMOVHPS takes 2 or 3 operands") + } +} + +func __asm_proxy_VMOVLHPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VMOVLHPS(v[0], v[1], v[2]) + } else { + panic("instruction VMOVLHPS takes exactly 3 operands") + } +} + +func __asm_proxy_VMOVLPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VMOVLPD(v[0], v[1]) + case 3 : return p.VMOVLPD(v[0], v[1], v[2]) + default : panic("instruction VMOVLPD takes 2 or 3 operands") + } +} + +func __asm_proxy_VMOVLPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VMOVLPS(v[0], v[1]) + case 3 : return p.VMOVLPS(v[0], v[1], v[2]) + default : panic("instruction VMOVLPS takes 2 or 3 operands") + } +} + +func __asm_proxy_VMOVMSKPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVMSKPD(v[0], v[1]) + } else { + panic("instruction VMOVMSKPD takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVMSKPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVMSKPS(v[0], v[1]) + } else { + panic("instruction VMOVMSKPS takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVNTDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVNTDQ(v[0], v[1]) + } else { + panic("instruction VMOVNTDQ takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVNTDQA__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVNTDQA(v[0], v[1]) + } else { + panic("instruction VMOVNTDQA takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVNTPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVNTPD(v[0], v[1]) + } else { + panic("instruction VMOVNTPD takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVNTPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVNTPS(v[0], v[1]) + } else { + panic("instruction VMOVNTPS takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVQ(v[0], v[1]) + } else { + panic("instruction VMOVQ takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVSD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VMOVSD(v[0], v[1]) + case 3 : return p.VMOVSD(v[0], v[1], v[2]) + default : panic("instruction VMOVSD takes 2 or 3 operands") + } +} + +func __asm_proxy_VMOVSHDUP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVSHDUP(v[0], v[1]) + } else { + panic("instruction VMOVSHDUP takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVSLDUP__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVSLDUP(v[0], v[1]) + } else { + panic("instruction VMOVSLDUP takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVSS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VMOVSS(v[0], v[1]) + case 3 : return p.VMOVSS(v[0], v[1], v[2]) + default : panic("instruction VMOVSS takes 2 or 3 operands") + } +} + +func __asm_proxy_VMOVUPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVUPD(v[0], v[1]) + } else { + panic("instruction VMOVUPD takes exactly 2 operands") + } +} + +func __asm_proxy_VMOVUPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VMOVUPS(v[0], v[1]) + } else { + panic("instruction VMOVUPS takes exactly 2 operands") + } +} + +func __asm_proxy_VMPSADBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VMPSADBW(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VMPSADBW takes exactly 4 operands") + } +} + +func __asm_proxy_VMULPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VMULPD(v[0], v[1], v[2]) + case 4 : return p.VMULPD(v[0], v[1], v[2], v[3]) + default : panic("instruction VMULPD takes 3 or 4 operands") + } +} + +func __asm_proxy_VMULPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VMULPS(v[0], v[1], v[2]) + case 4 : return p.VMULPS(v[0], v[1], v[2], v[3]) + default : panic("instruction VMULPS takes 3 or 4 operands") + } +} + +func __asm_proxy_VMULSD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VMULSD(v[0], v[1], v[2]) + case 4 : return p.VMULSD(v[0], v[1], v[2], v[3]) + default : panic("instruction VMULSD takes 3 or 4 operands") + } +} + +func __asm_proxy_VMULSS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VMULSS(v[0], v[1], v[2]) + case 4 : return p.VMULSS(v[0], v[1], v[2], v[3]) + default : panic("instruction VMULSS takes 3 or 4 operands") + } +} + +func __asm_proxy_VORPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VORPD(v[0], v[1], v[2]) + } else { + panic("instruction VORPD takes exactly 3 operands") + } +} + +func __asm_proxy_VORPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VORPS(v[0], v[1], v[2]) + } else { + panic("instruction VORPS takes exactly 3 operands") + } +} + +func __asm_proxy_VPABSB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPABSB(v[0], v[1]) + } else { + panic("instruction VPABSB takes exactly 2 operands") + } +} + +func __asm_proxy_VPABSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPABSD(v[0], v[1]) + } else { + panic("instruction VPABSD takes exactly 2 operands") + } +} + +func __asm_proxy_VPABSQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPABSQ(v[0], v[1]) + } else { + panic("instruction VPABSQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPABSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPABSW(v[0], v[1]) + } else { + panic("instruction VPABSW takes exactly 2 operands") + } +} + +func __asm_proxy_VPACKSSDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPACKSSDW(v[0], v[1], v[2]) + } else { + panic("instruction VPACKSSDW takes exactly 3 operands") + } +} + +func __asm_proxy_VPACKSSWB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPACKSSWB(v[0], v[1], v[2]) + } else { + panic("instruction VPACKSSWB takes exactly 3 operands") + } +} + +func __asm_proxy_VPACKUSDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPACKUSDW(v[0], v[1], v[2]) + } else { + panic("instruction VPACKUSDW takes exactly 3 operands") + } +} + +func __asm_proxy_VPACKUSWB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPACKUSWB(v[0], v[1], v[2]) + } else { + panic("instruction VPACKUSWB takes exactly 3 operands") + } +} + +func __asm_proxy_VPADDB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPADDB(v[0], v[1], v[2]) + } else { + panic("instruction VPADDB takes exactly 3 operands") + } +} + +func __asm_proxy_VPADDD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPADDD(v[0], v[1], v[2]) + } else { + panic("instruction VPADDD takes exactly 3 operands") + } +} + +func __asm_proxy_VPADDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPADDQ(v[0], v[1], v[2]) + } else { + panic("instruction VPADDQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPADDSB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPADDSB(v[0], v[1], v[2]) + } else { + panic("instruction VPADDSB takes exactly 3 operands") + } +} + +func __asm_proxy_VPADDSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPADDSW(v[0], v[1], v[2]) + } else { + panic("instruction VPADDSW takes exactly 3 operands") + } +} + +func __asm_proxy_VPADDUSB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPADDUSB(v[0], v[1], v[2]) + } else { + panic("instruction VPADDUSB takes exactly 3 operands") + } +} + +func __asm_proxy_VPADDUSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPADDUSW(v[0], v[1], v[2]) + } else { + panic("instruction VPADDUSW takes exactly 3 operands") + } +} + +func __asm_proxy_VPADDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPADDW(v[0], v[1], v[2]) + } else { + panic("instruction VPADDW takes exactly 3 operands") + } +} + +func __asm_proxy_VPALIGNR__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPALIGNR(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPALIGNR takes exactly 4 operands") + } +} + +func __asm_proxy_VPAND__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPAND(v[0], v[1], v[2]) + } else { + panic("instruction VPAND takes exactly 3 operands") + } +} + +func __asm_proxy_VPANDD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPANDD(v[0], v[1], v[2]) + } else { + panic("instruction VPANDD takes exactly 3 operands") + } +} + +func __asm_proxy_VPANDN__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPANDN(v[0], v[1], v[2]) + } else { + panic("instruction VPANDN takes exactly 3 operands") + } +} + +func __asm_proxy_VPANDND__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPANDND(v[0], v[1], v[2]) + } else { + panic("instruction VPANDND takes exactly 3 operands") + } +} + +func __asm_proxy_VPANDNQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPANDNQ(v[0], v[1], v[2]) + } else { + panic("instruction VPANDNQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPANDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPANDQ(v[0], v[1], v[2]) + } else { + panic("instruction VPANDQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPAVGB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPAVGB(v[0], v[1], v[2]) + } else { + panic("instruction VPAVGB takes exactly 3 operands") + } +} + +func __asm_proxy_VPAVGW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPAVGW(v[0], v[1], v[2]) + } else { + panic("instruction VPAVGW takes exactly 3 operands") + } +} + +func __asm_proxy_VPBLENDD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPBLENDD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPBLENDD takes exactly 4 operands") + } +} + +func __asm_proxy_VPBLENDMB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPBLENDMB(v[0], v[1], v[2]) + } else { + panic("instruction VPBLENDMB takes exactly 3 operands") + } +} + +func __asm_proxy_VPBLENDMD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPBLENDMD(v[0], v[1], v[2]) + } else { + panic("instruction VPBLENDMD takes exactly 3 operands") + } +} + +func __asm_proxy_VPBLENDMQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPBLENDMQ(v[0], v[1], v[2]) + } else { + panic("instruction VPBLENDMQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPBLENDMW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPBLENDMW(v[0], v[1], v[2]) + } else { + panic("instruction VPBLENDMW takes exactly 3 operands") + } +} + +func __asm_proxy_VPBLENDVB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPBLENDVB(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPBLENDVB takes exactly 4 operands") + } +} + +func __asm_proxy_VPBLENDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPBLENDW(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPBLENDW takes exactly 4 operands") + } +} + +func __asm_proxy_VPBROADCASTB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPBROADCASTB(v[0], v[1]) + } else { + panic("instruction VPBROADCASTB takes exactly 2 operands") + } +} + +func __asm_proxy_VPBROADCASTD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPBROADCASTD(v[0], v[1]) + } else { + panic("instruction VPBROADCASTD takes exactly 2 operands") + } +} + +func __asm_proxy_VPBROADCASTMB2Q__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPBROADCASTMB2Q(v[0], v[1]) + } else { + panic("instruction VPBROADCASTMB2Q takes exactly 2 operands") + } +} + +func __asm_proxy_VPBROADCASTMW2D__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPBROADCASTMW2D(v[0], v[1]) + } else { + panic("instruction VPBROADCASTMW2D takes exactly 2 operands") + } +} + +func __asm_proxy_VPBROADCASTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPBROADCASTQ(v[0], v[1]) + } else { + panic("instruction VPBROADCASTQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPBROADCASTW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPBROADCASTW(v[0], v[1]) + } else { + panic("instruction VPBROADCASTW takes exactly 2 operands") + } +} + +func __asm_proxy_VPCLMULQDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCLMULQDQ(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCLMULQDQ takes exactly 4 operands") + } +} + +func __asm_proxy_VPCMOV__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCMOV(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCMOV takes exactly 4 operands") + } +} + +func __asm_proxy_VPCMPB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCMPB(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCMPB takes exactly 4 operands") + } +} + +func __asm_proxy_VPCMPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCMPD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCMPD takes exactly 4 operands") + } +} + +func __asm_proxy_VPCMPEQB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPCMPEQB(v[0], v[1], v[2]) + } else { + panic("instruction VPCMPEQB takes exactly 3 operands") + } +} + +func __asm_proxy_VPCMPEQD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPCMPEQD(v[0], v[1], v[2]) + } else { + panic("instruction VPCMPEQD takes exactly 3 operands") + } +} + +func __asm_proxy_VPCMPEQQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPCMPEQQ(v[0], v[1], v[2]) + } else { + panic("instruction VPCMPEQQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPCMPEQW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPCMPEQW(v[0], v[1], v[2]) + } else { + panic("instruction VPCMPEQW takes exactly 3 operands") + } +} + +func __asm_proxy_VPCMPESTRI__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPCMPESTRI(v[0], v[1], v[2]) + } else { + panic("instruction VPCMPESTRI takes exactly 3 operands") + } +} + +func __asm_proxy_VPCMPESTRM__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPCMPESTRM(v[0], v[1], v[2]) + } else { + panic("instruction VPCMPESTRM takes exactly 3 operands") + } +} + +func __asm_proxy_VPCMPGTB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPCMPGTB(v[0], v[1], v[2]) + } else { + panic("instruction VPCMPGTB takes exactly 3 operands") + } +} + +func __asm_proxy_VPCMPGTD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPCMPGTD(v[0], v[1], v[2]) + } else { + panic("instruction VPCMPGTD takes exactly 3 operands") + } +} + +func __asm_proxy_VPCMPGTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPCMPGTQ(v[0], v[1], v[2]) + } else { + panic("instruction VPCMPGTQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPCMPGTW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPCMPGTW(v[0], v[1], v[2]) + } else { + panic("instruction VPCMPGTW takes exactly 3 operands") + } +} + +func __asm_proxy_VPCMPISTRI__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPCMPISTRI(v[0], v[1], v[2]) + } else { + panic("instruction VPCMPISTRI takes exactly 3 operands") + } +} + +func __asm_proxy_VPCMPISTRM__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPCMPISTRM(v[0], v[1], v[2]) + } else { + panic("instruction VPCMPISTRM takes exactly 3 operands") + } +} + +func __asm_proxy_VPCMPQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCMPQ(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCMPQ takes exactly 4 operands") + } +} + +func __asm_proxy_VPCMPUB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCMPUB(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCMPUB takes exactly 4 operands") + } +} + +func __asm_proxy_VPCMPUD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCMPUD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCMPUD takes exactly 4 operands") + } +} + +func __asm_proxy_VPCMPUQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCMPUQ(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCMPUQ takes exactly 4 operands") + } +} + +func __asm_proxy_VPCMPUW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCMPUW(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCMPUW takes exactly 4 operands") + } +} + +func __asm_proxy_VPCMPW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCMPW(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCMPW takes exactly 4 operands") + } +} + +func __asm_proxy_VPCOMB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCOMB(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCOMB takes exactly 4 operands") + } +} + +func __asm_proxy_VPCOMD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCOMD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCOMD takes exactly 4 operands") + } +} + +func __asm_proxy_VPCOMPRESSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPCOMPRESSD(v[0], v[1]) + } else { + panic("instruction VPCOMPRESSD takes exactly 2 operands") + } +} + +func __asm_proxy_VPCOMPRESSQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPCOMPRESSQ(v[0], v[1]) + } else { + panic("instruction VPCOMPRESSQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPCOMQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCOMQ(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCOMQ takes exactly 4 operands") + } +} + +func __asm_proxy_VPCOMUB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCOMUB(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCOMUB takes exactly 4 operands") + } +} + +func __asm_proxy_VPCOMUD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCOMUD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCOMUD takes exactly 4 operands") + } +} + +func __asm_proxy_VPCOMUQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCOMUQ(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCOMUQ takes exactly 4 operands") + } +} + +func __asm_proxy_VPCOMUW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCOMUW(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCOMUW takes exactly 4 operands") + } +} + +func __asm_proxy_VPCOMW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPCOMW(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPCOMW takes exactly 4 operands") + } +} + +func __asm_proxy_VPCONFLICTD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPCONFLICTD(v[0], v[1]) + } else { + panic("instruction VPCONFLICTD takes exactly 2 operands") + } +} + +func __asm_proxy_VPCONFLICTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPCONFLICTQ(v[0], v[1]) + } else { + panic("instruction VPCONFLICTQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPERM2F128__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPERM2F128(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPERM2F128 takes exactly 4 operands") + } +} + +func __asm_proxy_VPERM2I128__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPERM2I128(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPERM2I128 takes exactly 4 operands") + } +} + +func __asm_proxy_VPERMB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMB(v[0], v[1], v[2]) + } else { + panic("instruction VPERMB takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMD(v[0], v[1], v[2]) + } else { + panic("instruction VPERMD takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMI2B__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMI2B(v[0], v[1], v[2]) + } else { + panic("instruction VPERMI2B takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMI2D__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMI2D(v[0], v[1], v[2]) + } else { + panic("instruction VPERMI2D takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMI2PD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMI2PD(v[0], v[1], v[2]) + } else { + panic("instruction VPERMI2PD takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMI2PS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMI2PS(v[0], v[1], v[2]) + } else { + panic("instruction VPERMI2PS takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMI2Q__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMI2Q(v[0], v[1], v[2]) + } else { + panic("instruction VPERMI2Q takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMI2W__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMI2W(v[0], v[1], v[2]) + } else { + panic("instruction VPERMI2W takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMIL2PD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 5 { + return p.VPERMIL2PD(v[0], v[1], v[2], v[3], v[4]) + } else { + panic("instruction VPERMIL2PD takes exactly 5 operands") + } +} + +func __asm_proxy_VPERMIL2PS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 5 { + return p.VPERMIL2PS(v[0], v[1], v[2], v[3], v[4]) + } else { + panic("instruction VPERMIL2PS takes exactly 5 operands") + } +} + +func __asm_proxy_VPERMILPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMILPD(v[0], v[1], v[2]) + } else { + panic("instruction VPERMILPD takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMILPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMILPS(v[0], v[1], v[2]) + } else { + panic("instruction VPERMILPS takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMPD(v[0], v[1], v[2]) + } else { + panic("instruction VPERMPD takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMPS(v[0], v[1], v[2]) + } else { + panic("instruction VPERMPS takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMQ(v[0], v[1], v[2]) + } else { + panic("instruction VPERMQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMT2B__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMT2B(v[0], v[1], v[2]) + } else { + panic("instruction VPERMT2B takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMT2D__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMT2D(v[0], v[1], v[2]) + } else { + panic("instruction VPERMT2D takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMT2PD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMT2PD(v[0], v[1], v[2]) + } else { + panic("instruction VPERMT2PD takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMT2PS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMT2PS(v[0], v[1], v[2]) + } else { + panic("instruction VPERMT2PS takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMT2Q__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMT2Q(v[0], v[1], v[2]) + } else { + panic("instruction VPERMT2Q takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMT2W__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMT2W(v[0], v[1], v[2]) + } else { + panic("instruction VPERMT2W takes exactly 3 operands") + } +} + +func __asm_proxy_VPERMW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPERMW(v[0], v[1], v[2]) + } else { + panic("instruction VPERMW takes exactly 3 operands") + } +} + +func __asm_proxy_VPEXPANDD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPEXPANDD(v[0], v[1]) + } else { + panic("instruction VPEXPANDD takes exactly 2 operands") + } +} + +func __asm_proxy_VPEXPANDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPEXPANDQ(v[0], v[1]) + } else { + panic("instruction VPEXPANDQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPEXTRB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPEXTRB(v[0], v[1], v[2]) + } else { + panic("instruction VPEXTRB takes exactly 3 operands") + } +} + +func __asm_proxy_VPEXTRD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPEXTRD(v[0], v[1], v[2]) + } else { + panic("instruction VPEXTRD takes exactly 3 operands") + } +} + +func __asm_proxy_VPEXTRQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPEXTRQ(v[0], v[1], v[2]) + } else { + panic("instruction VPEXTRQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPEXTRW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPEXTRW(v[0], v[1], v[2]) + } else { + panic("instruction VPEXTRW takes exactly 3 operands") + } +} + +func __asm_proxy_VPGATHERDD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VPGATHERDD(v[0], v[1]) + case 3 : return p.VPGATHERDD(v[0], v[1], v[2]) + default : panic("instruction VPGATHERDD takes 2 or 3 operands") + } +} + +func __asm_proxy_VPGATHERDQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VPGATHERDQ(v[0], v[1]) + case 3 : return p.VPGATHERDQ(v[0], v[1], v[2]) + default : panic("instruction VPGATHERDQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VPGATHERQD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VPGATHERQD(v[0], v[1]) + case 3 : return p.VPGATHERQD(v[0], v[1], v[2]) + default : panic("instruction VPGATHERQD takes 2 or 3 operands") + } +} + +func __asm_proxy_VPGATHERQQ__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VPGATHERQQ(v[0], v[1]) + case 3 : return p.VPGATHERQQ(v[0], v[1], v[2]) + default : panic("instruction VPGATHERQQ takes 2 or 3 operands") + } +} + +func __asm_proxy_VPHADDBD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHADDBD(v[0], v[1]) + } else { + panic("instruction VPHADDBD takes exactly 2 operands") + } +} + +func __asm_proxy_VPHADDBQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHADDBQ(v[0], v[1]) + } else { + panic("instruction VPHADDBQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPHADDBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHADDBW(v[0], v[1]) + } else { + panic("instruction VPHADDBW takes exactly 2 operands") + } +} + +func __asm_proxy_VPHADDD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPHADDD(v[0], v[1], v[2]) + } else { + panic("instruction VPHADDD takes exactly 3 operands") + } +} + +func __asm_proxy_VPHADDDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHADDDQ(v[0], v[1]) + } else { + panic("instruction VPHADDDQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPHADDSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPHADDSW(v[0], v[1], v[2]) + } else { + panic("instruction VPHADDSW takes exactly 3 operands") + } +} + +func __asm_proxy_VPHADDUBD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHADDUBD(v[0], v[1]) + } else { + panic("instruction VPHADDUBD takes exactly 2 operands") + } +} + +func __asm_proxy_VPHADDUBQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHADDUBQ(v[0], v[1]) + } else { + panic("instruction VPHADDUBQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPHADDUBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHADDUBW(v[0], v[1]) + } else { + panic("instruction VPHADDUBW takes exactly 2 operands") + } +} + +func __asm_proxy_VPHADDUDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHADDUDQ(v[0], v[1]) + } else { + panic("instruction VPHADDUDQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPHADDUWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHADDUWD(v[0], v[1]) + } else { + panic("instruction VPHADDUWD takes exactly 2 operands") + } +} + +func __asm_proxy_VPHADDUWQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHADDUWQ(v[0], v[1]) + } else { + panic("instruction VPHADDUWQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPHADDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPHADDW(v[0], v[1], v[2]) + } else { + panic("instruction VPHADDW takes exactly 3 operands") + } +} + +func __asm_proxy_VPHADDWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHADDWD(v[0], v[1]) + } else { + panic("instruction VPHADDWD takes exactly 2 operands") + } +} + +func __asm_proxy_VPHADDWQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHADDWQ(v[0], v[1]) + } else { + panic("instruction VPHADDWQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPHMINPOSUW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHMINPOSUW(v[0], v[1]) + } else { + panic("instruction VPHMINPOSUW takes exactly 2 operands") + } +} + +func __asm_proxy_VPHSUBBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHSUBBW(v[0], v[1]) + } else { + panic("instruction VPHSUBBW takes exactly 2 operands") + } +} + +func __asm_proxy_VPHSUBD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPHSUBD(v[0], v[1], v[2]) + } else { + panic("instruction VPHSUBD takes exactly 3 operands") + } +} + +func __asm_proxy_VPHSUBDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHSUBDQ(v[0], v[1]) + } else { + panic("instruction VPHSUBDQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPHSUBSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPHSUBSW(v[0], v[1], v[2]) + } else { + panic("instruction VPHSUBSW takes exactly 3 operands") + } +} + +func __asm_proxy_VPHSUBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPHSUBW(v[0], v[1], v[2]) + } else { + panic("instruction VPHSUBW takes exactly 3 operands") + } +} + +func __asm_proxy_VPHSUBWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPHSUBWD(v[0], v[1]) + } else { + panic("instruction VPHSUBWD takes exactly 2 operands") + } +} + +func __asm_proxy_VPINSRB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPINSRB(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPINSRB takes exactly 4 operands") + } +} + +func __asm_proxy_VPINSRD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPINSRD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPINSRD takes exactly 4 operands") + } +} + +func __asm_proxy_VPINSRQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPINSRQ(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPINSRQ takes exactly 4 operands") + } +} + +func __asm_proxy_VPINSRW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPINSRW(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPINSRW takes exactly 4 operands") + } +} + +func __asm_proxy_VPLZCNTD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPLZCNTD(v[0], v[1]) + } else { + panic("instruction VPLZCNTD takes exactly 2 operands") + } +} + +func __asm_proxy_VPLZCNTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPLZCNTQ(v[0], v[1]) + } else { + panic("instruction VPLZCNTQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPMACSDD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPMACSDD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPMACSDD takes exactly 4 operands") + } +} + +func __asm_proxy_VPMACSDQH__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPMACSDQH(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPMACSDQH takes exactly 4 operands") + } +} + +func __asm_proxy_VPMACSDQL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPMACSDQL(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPMACSDQL takes exactly 4 operands") + } +} + +func __asm_proxy_VPMACSSDD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPMACSSDD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPMACSSDD takes exactly 4 operands") + } +} + +func __asm_proxy_VPMACSSDQH__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPMACSSDQH(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPMACSSDQH takes exactly 4 operands") + } +} + +func __asm_proxy_VPMACSSDQL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPMACSSDQL(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPMACSSDQL takes exactly 4 operands") + } +} + +func __asm_proxy_VPMACSSWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPMACSSWD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPMACSSWD takes exactly 4 operands") + } +} + +func __asm_proxy_VPMACSSWW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPMACSSWW(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPMACSSWW takes exactly 4 operands") + } +} + +func __asm_proxy_VPMACSWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPMACSWD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPMACSWD takes exactly 4 operands") + } +} + +func __asm_proxy_VPMACSWW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPMACSWW(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPMACSWW takes exactly 4 operands") + } +} + +func __asm_proxy_VPMADCSSWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPMADCSSWD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPMADCSSWD takes exactly 4 operands") + } +} + +func __asm_proxy_VPMADCSWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPMADCSWD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPMADCSWD takes exactly 4 operands") + } +} + +func __asm_proxy_VPMADD52HUQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMADD52HUQ(v[0], v[1], v[2]) + } else { + panic("instruction VPMADD52HUQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPMADD52LUQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMADD52LUQ(v[0], v[1], v[2]) + } else { + panic("instruction VPMADD52LUQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPMADDUBSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMADDUBSW(v[0], v[1], v[2]) + } else { + panic("instruction VPMADDUBSW takes exactly 3 operands") + } +} + +func __asm_proxy_VPMADDWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMADDWD(v[0], v[1], v[2]) + } else { + panic("instruction VPMADDWD takes exactly 3 operands") + } +} + +func __asm_proxy_VPMASKMOVD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMASKMOVD(v[0], v[1], v[2]) + } else { + panic("instruction VPMASKMOVD takes exactly 3 operands") + } +} + +func __asm_proxy_VPMASKMOVQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMASKMOVQ(v[0], v[1], v[2]) + } else { + panic("instruction VPMASKMOVQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPMAXSB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMAXSB(v[0], v[1], v[2]) + } else { + panic("instruction VPMAXSB takes exactly 3 operands") + } +} + +func __asm_proxy_VPMAXSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMAXSD(v[0], v[1], v[2]) + } else { + panic("instruction VPMAXSD takes exactly 3 operands") + } +} + +func __asm_proxy_VPMAXSQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMAXSQ(v[0], v[1], v[2]) + } else { + panic("instruction VPMAXSQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPMAXSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMAXSW(v[0], v[1], v[2]) + } else { + panic("instruction VPMAXSW takes exactly 3 operands") + } +} + +func __asm_proxy_VPMAXUB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMAXUB(v[0], v[1], v[2]) + } else { + panic("instruction VPMAXUB takes exactly 3 operands") + } +} + +func __asm_proxy_VPMAXUD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMAXUD(v[0], v[1], v[2]) + } else { + panic("instruction VPMAXUD takes exactly 3 operands") + } +} + +func __asm_proxy_VPMAXUQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMAXUQ(v[0], v[1], v[2]) + } else { + panic("instruction VPMAXUQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPMAXUW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMAXUW(v[0], v[1], v[2]) + } else { + panic("instruction VPMAXUW takes exactly 3 operands") + } +} + +func __asm_proxy_VPMINSB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMINSB(v[0], v[1], v[2]) + } else { + panic("instruction VPMINSB takes exactly 3 operands") + } +} + +func __asm_proxy_VPMINSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMINSD(v[0], v[1], v[2]) + } else { + panic("instruction VPMINSD takes exactly 3 operands") + } +} + +func __asm_proxy_VPMINSQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMINSQ(v[0], v[1], v[2]) + } else { + panic("instruction VPMINSQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPMINSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMINSW(v[0], v[1], v[2]) + } else { + panic("instruction VPMINSW takes exactly 3 operands") + } +} + +func __asm_proxy_VPMINUB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMINUB(v[0], v[1], v[2]) + } else { + panic("instruction VPMINUB takes exactly 3 operands") + } +} + +func __asm_proxy_VPMINUD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMINUD(v[0], v[1], v[2]) + } else { + panic("instruction VPMINUD takes exactly 3 operands") + } +} + +func __asm_proxy_VPMINUQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMINUQ(v[0], v[1], v[2]) + } else { + panic("instruction VPMINUQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPMINUW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMINUW(v[0], v[1], v[2]) + } else { + panic("instruction VPMINUW takes exactly 3 operands") + } +} + +func __asm_proxy_VPMOVB2M__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVB2M(v[0], v[1]) + } else { + panic("instruction VPMOVB2M takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVD2M__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVD2M(v[0], v[1]) + } else { + panic("instruction VPMOVD2M takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVDB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVDB(v[0], v[1]) + } else { + panic("instruction VPMOVDB takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVDW(v[0], v[1]) + } else { + panic("instruction VPMOVDW takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVM2B__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVM2B(v[0], v[1]) + } else { + panic("instruction VPMOVM2B takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVM2D__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVM2D(v[0], v[1]) + } else { + panic("instruction VPMOVM2D takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVM2Q__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVM2Q(v[0], v[1]) + } else { + panic("instruction VPMOVM2Q takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVM2W__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVM2W(v[0], v[1]) + } else { + panic("instruction VPMOVM2W takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVMSKB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVMSKB(v[0], v[1]) + } else { + panic("instruction VPMOVMSKB takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVQ2M__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVQ2M(v[0], v[1]) + } else { + panic("instruction VPMOVQ2M takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVQB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVQB(v[0], v[1]) + } else { + panic("instruction VPMOVQB takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVQD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVQD(v[0], v[1]) + } else { + panic("instruction VPMOVQD takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVQW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVQW(v[0], v[1]) + } else { + panic("instruction VPMOVQW takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVSDB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVSDB(v[0], v[1]) + } else { + panic("instruction VPMOVSDB takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVSDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVSDW(v[0], v[1]) + } else { + panic("instruction VPMOVSDW takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVSQB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVSQB(v[0], v[1]) + } else { + panic("instruction VPMOVSQB takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVSQD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVSQD(v[0], v[1]) + } else { + panic("instruction VPMOVSQD takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVSQW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVSQW(v[0], v[1]) + } else { + panic("instruction VPMOVSQW takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVSWB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVSWB(v[0], v[1]) + } else { + panic("instruction VPMOVSWB takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVSXBD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVSXBD(v[0], v[1]) + } else { + panic("instruction VPMOVSXBD takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVSXBQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVSXBQ(v[0], v[1]) + } else { + panic("instruction VPMOVSXBQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVSXBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVSXBW(v[0], v[1]) + } else { + panic("instruction VPMOVSXBW takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVSXDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVSXDQ(v[0], v[1]) + } else { + panic("instruction VPMOVSXDQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVSXWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVSXWD(v[0], v[1]) + } else { + panic("instruction VPMOVSXWD takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVSXWQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVSXWQ(v[0], v[1]) + } else { + panic("instruction VPMOVSXWQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVUSDB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVUSDB(v[0], v[1]) + } else { + panic("instruction VPMOVUSDB takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVUSDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVUSDW(v[0], v[1]) + } else { + panic("instruction VPMOVUSDW takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVUSQB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVUSQB(v[0], v[1]) + } else { + panic("instruction VPMOVUSQB takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVUSQD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVUSQD(v[0], v[1]) + } else { + panic("instruction VPMOVUSQD takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVUSQW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVUSQW(v[0], v[1]) + } else { + panic("instruction VPMOVUSQW takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVUSWB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVUSWB(v[0], v[1]) + } else { + panic("instruction VPMOVUSWB takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVW2M__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVW2M(v[0], v[1]) + } else { + panic("instruction VPMOVW2M takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVWB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVWB(v[0], v[1]) + } else { + panic("instruction VPMOVWB takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVZXBD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVZXBD(v[0], v[1]) + } else { + panic("instruction VPMOVZXBD takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVZXBQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVZXBQ(v[0], v[1]) + } else { + panic("instruction VPMOVZXBQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVZXBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVZXBW(v[0], v[1]) + } else { + panic("instruction VPMOVZXBW takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVZXDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVZXDQ(v[0], v[1]) + } else { + panic("instruction VPMOVZXDQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVZXWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVZXWD(v[0], v[1]) + } else { + panic("instruction VPMOVZXWD takes exactly 2 operands") + } +} + +func __asm_proxy_VPMOVZXWQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPMOVZXWQ(v[0], v[1]) + } else { + panic("instruction VPMOVZXWQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPMULDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMULDQ(v[0], v[1], v[2]) + } else { + panic("instruction VPMULDQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPMULHRSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMULHRSW(v[0], v[1], v[2]) + } else { + panic("instruction VPMULHRSW takes exactly 3 operands") + } +} + +func __asm_proxy_VPMULHUW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMULHUW(v[0], v[1], v[2]) + } else { + panic("instruction VPMULHUW takes exactly 3 operands") + } +} + +func __asm_proxy_VPMULHW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMULHW(v[0], v[1], v[2]) + } else { + panic("instruction VPMULHW takes exactly 3 operands") + } +} + +func __asm_proxy_VPMULLD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMULLD(v[0], v[1], v[2]) + } else { + panic("instruction VPMULLD takes exactly 3 operands") + } +} + +func __asm_proxy_VPMULLQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMULLQ(v[0], v[1], v[2]) + } else { + panic("instruction VPMULLQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPMULLW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMULLW(v[0], v[1], v[2]) + } else { + panic("instruction VPMULLW takes exactly 3 operands") + } +} + +func __asm_proxy_VPMULTISHIFTQB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMULTISHIFTQB(v[0], v[1], v[2]) + } else { + panic("instruction VPMULTISHIFTQB takes exactly 3 operands") + } +} + +func __asm_proxy_VPMULUDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPMULUDQ(v[0], v[1], v[2]) + } else { + panic("instruction VPMULUDQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPOPCNTD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPOPCNTD(v[0], v[1]) + } else { + panic("instruction VPOPCNTD takes exactly 2 operands") + } +} + +func __asm_proxy_VPOPCNTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPOPCNTQ(v[0], v[1]) + } else { + panic("instruction VPOPCNTQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPOR__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPOR(v[0], v[1], v[2]) + } else { + panic("instruction VPOR takes exactly 3 operands") + } +} + +func __asm_proxy_VPORD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPORD(v[0], v[1], v[2]) + } else { + panic("instruction VPORD takes exactly 3 operands") + } +} + +func __asm_proxy_VPORQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPORQ(v[0], v[1], v[2]) + } else { + panic("instruction VPORQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPPERM__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPPERM(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPPERM takes exactly 4 operands") + } +} + +func __asm_proxy_VPROLD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPROLD(v[0], v[1], v[2]) + } else { + panic("instruction VPROLD takes exactly 3 operands") + } +} + +func __asm_proxy_VPROLQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPROLQ(v[0], v[1], v[2]) + } else { + panic("instruction VPROLQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPROLVD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPROLVD(v[0], v[1], v[2]) + } else { + panic("instruction VPROLVD takes exactly 3 operands") + } +} + +func __asm_proxy_VPROLVQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPROLVQ(v[0], v[1], v[2]) + } else { + panic("instruction VPROLVQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPRORD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPRORD(v[0], v[1], v[2]) + } else { + panic("instruction VPRORD takes exactly 3 operands") + } +} + +func __asm_proxy_VPRORQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPRORQ(v[0], v[1], v[2]) + } else { + panic("instruction VPRORQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPRORVD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPRORVD(v[0], v[1], v[2]) + } else { + panic("instruction VPRORVD takes exactly 3 operands") + } +} + +func __asm_proxy_VPRORVQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPRORVQ(v[0], v[1], v[2]) + } else { + panic("instruction VPRORVQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPROTB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPROTB(v[0], v[1], v[2]) + } else { + panic("instruction VPROTB takes exactly 3 operands") + } +} + +func __asm_proxy_VPROTD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPROTD(v[0], v[1], v[2]) + } else { + panic("instruction VPROTD takes exactly 3 operands") + } +} + +func __asm_proxy_VPROTQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPROTQ(v[0], v[1], v[2]) + } else { + panic("instruction VPROTQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPROTW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPROTW(v[0], v[1], v[2]) + } else { + panic("instruction VPROTW takes exactly 3 operands") + } +} + +func __asm_proxy_VPSADBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSADBW(v[0], v[1], v[2]) + } else { + panic("instruction VPSADBW takes exactly 3 operands") + } +} + +func __asm_proxy_VPSCATTERDD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPSCATTERDD(v[0], v[1]) + } else { + panic("instruction VPSCATTERDD takes exactly 2 operands") + } +} + +func __asm_proxy_VPSCATTERDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPSCATTERDQ(v[0], v[1]) + } else { + panic("instruction VPSCATTERDQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPSCATTERQD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPSCATTERQD(v[0], v[1]) + } else { + panic("instruction VPSCATTERQD takes exactly 2 operands") + } +} + +func __asm_proxy_VPSCATTERQQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPSCATTERQQ(v[0], v[1]) + } else { + panic("instruction VPSCATTERQQ takes exactly 2 operands") + } +} + +func __asm_proxy_VPSHAB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSHAB(v[0], v[1], v[2]) + } else { + panic("instruction VPSHAB takes exactly 3 operands") + } +} + +func __asm_proxy_VPSHAD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSHAD(v[0], v[1], v[2]) + } else { + panic("instruction VPSHAD takes exactly 3 operands") + } +} + +func __asm_proxy_VPSHAQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSHAQ(v[0], v[1], v[2]) + } else { + panic("instruction VPSHAQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPSHAW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSHAW(v[0], v[1], v[2]) + } else { + panic("instruction VPSHAW takes exactly 3 operands") + } +} + +func __asm_proxy_VPSHLB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSHLB(v[0], v[1], v[2]) + } else { + panic("instruction VPSHLB takes exactly 3 operands") + } +} + +func __asm_proxy_VPSHLD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSHLD(v[0], v[1], v[2]) + } else { + panic("instruction VPSHLD takes exactly 3 operands") + } +} + +func __asm_proxy_VPSHLQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSHLQ(v[0], v[1], v[2]) + } else { + panic("instruction VPSHLQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPSHLW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSHLW(v[0], v[1], v[2]) + } else { + panic("instruction VPSHLW takes exactly 3 operands") + } +} + +func __asm_proxy_VPSHUFB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSHUFB(v[0], v[1], v[2]) + } else { + panic("instruction VPSHUFB takes exactly 3 operands") + } +} + +func __asm_proxy_VPSHUFD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSHUFD(v[0], v[1], v[2]) + } else { + panic("instruction VPSHUFD takes exactly 3 operands") + } +} + +func __asm_proxy_VPSHUFHW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSHUFHW(v[0], v[1], v[2]) + } else { + panic("instruction VPSHUFHW takes exactly 3 operands") + } +} + +func __asm_proxy_VPSHUFLW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSHUFLW(v[0], v[1], v[2]) + } else { + panic("instruction VPSHUFLW takes exactly 3 operands") + } +} + +func __asm_proxy_VPSIGNB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSIGNB(v[0], v[1], v[2]) + } else { + panic("instruction VPSIGNB takes exactly 3 operands") + } +} + +func __asm_proxy_VPSIGND__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSIGND(v[0], v[1], v[2]) + } else { + panic("instruction VPSIGND takes exactly 3 operands") + } +} + +func __asm_proxy_VPSIGNW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSIGNW(v[0], v[1], v[2]) + } else { + panic("instruction VPSIGNW takes exactly 3 operands") + } +} + +func __asm_proxy_VPSLLD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSLLD(v[0], v[1], v[2]) + } else { + panic("instruction VPSLLD takes exactly 3 operands") + } +} + +func __asm_proxy_VPSLLDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSLLDQ(v[0], v[1], v[2]) + } else { + panic("instruction VPSLLDQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPSLLQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSLLQ(v[0], v[1], v[2]) + } else { + panic("instruction VPSLLQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPSLLVD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSLLVD(v[0], v[1], v[2]) + } else { + panic("instruction VPSLLVD takes exactly 3 operands") + } +} + +func __asm_proxy_VPSLLVQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSLLVQ(v[0], v[1], v[2]) + } else { + panic("instruction VPSLLVQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPSLLVW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSLLVW(v[0], v[1], v[2]) + } else { + panic("instruction VPSLLVW takes exactly 3 operands") + } +} + +func __asm_proxy_VPSLLW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSLLW(v[0], v[1], v[2]) + } else { + panic("instruction VPSLLW takes exactly 3 operands") + } +} + +func __asm_proxy_VPSRAD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSRAD(v[0], v[1], v[2]) + } else { + panic("instruction VPSRAD takes exactly 3 operands") + } +} + +func __asm_proxy_VPSRAQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSRAQ(v[0], v[1], v[2]) + } else { + panic("instruction VPSRAQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPSRAVD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSRAVD(v[0], v[1], v[2]) + } else { + panic("instruction VPSRAVD takes exactly 3 operands") + } +} + +func __asm_proxy_VPSRAVQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSRAVQ(v[0], v[1], v[2]) + } else { + panic("instruction VPSRAVQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPSRAVW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSRAVW(v[0], v[1], v[2]) + } else { + panic("instruction VPSRAVW takes exactly 3 operands") + } +} + +func __asm_proxy_VPSRAW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSRAW(v[0], v[1], v[2]) + } else { + panic("instruction VPSRAW takes exactly 3 operands") + } +} + +func __asm_proxy_VPSRLD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSRLD(v[0], v[1], v[2]) + } else { + panic("instruction VPSRLD takes exactly 3 operands") + } +} + +func __asm_proxy_VPSRLDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSRLDQ(v[0], v[1], v[2]) + } else { + panic("instruction VPSRLDQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPSRLQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSRLQ(v[0], v[1], v[2]) + } else { + panic("instruction VPSRLQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPSRLVD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSRLVD(v[0], v[1], v[2]) + } else { + panic("instruction VPSRLVD takes exactly 3 operands") + } +} + +func __asm_proxy_VPSRLVQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSRLVQ(v[0], v[1], v[2]) + } else { + panic("instruction VPSRLVQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPSRLVW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSRLVW(v[0], v[1], v[2]) + } else { + panic("instruction VPSRLVW takes exactly 3 operands") + } +} + +func __asm_proxy_VPSRLW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSRLW(v[0], v[1], v[2]) + } else { + panic("instruction VPSRLW takes exactly 3 operands") + } +} + +func __asm_proxy_VPSUBB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSUBB(v[0], v[1], v[2]) + } else { + panic("instruction VPSUBB takes exactly 3 operands") + } +} + +func __asm_proxy_VPSUBD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSUBD(v[0], v[1], v[2]) + } else { + panic("instruction VPSUBD takes exactly 3 operands") + } +} + +func __asm_proxy_VPSUBQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSUBQ(v[0], v[1], v[2]) + } else { + panic("instruction VPSUBQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPSUBSB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSUBSB(v[0], v[1], v[2]) + } else { + panic("instruction VPSUBSB takes exactly 3 operands") + } +} + +func __asm_proxy_VPSUBSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSUBSW(v[0], v[1], v[2]) + } else { + panic("instruction VPSUBSW takes exactly 3 operands") + } +} + +func __asm_proxy_VPSUBUSB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSUBUSB(v[0], v[1], v[2]) + } else { + panic("instruction VPSUBUSB takes exactly 3 operands") + } +} + +func __asm_proxy_VPSUBUSW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSUBUSW(v[0], v[1], v[2]) + } else { + panic("instruction VPSUBUSW takes exactly 3 operands") + } +} + +func __asm_proxy_VPSUBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPSUBW(v[0], v[1], v[2]) + } else { + panic("instruction VPSUBW takes exactly 3 operands") + } +} + +func __asm_proxy_VPTERNLOGD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPTERNLOGD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPTERNLOGD takes exactly 4 operands") + } +} + +func __asm_proxy_VPTERNLOGQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VPTERNLOGQ(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VPTERNLOGQ takes exactly 4 operands") + } +} + +func __asm_proxy_VPTEST__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VPTEST(v[0], v[1]) + } else { + panic("instruction VPTEST takes exactly 2 operands") + } +} + +func __asm_proxy_VPTESTMB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPTESTMB(v[0], v[1], v[2]) + } else { + panic("instruction VPTESTMB takes exactly 3 operands") + } +} + +func __asm_proxy_VPTESTMD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPTESTMD(v[0], v[1], v[2]) + } else { + panic("instruction VPTESTMD takes exactly 3 operands") + } +} + +func __asm_proxy_VPTESTMQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPTESTMQ(v[0], v[1], v[2]) + } else { + panic("instruction VPTESTMQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPTESTMW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPTESTMW(v[0], v[1], v[2]) + } else { + panic("instruction VPTESTMW takes exactly 3 operands") + } +} + +func __asm_proxy_VPTESTNMB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPTESTNMB(v[0], v[1], v[2]) + } else { + panic("instruction VPTESTNMB takes exactly 3 operands") + } +} + +func __asm_proxy_VPTESTNMD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPTESTNMD(v[0], v[1], v[2]) + } else { + panic("instruction VPTESTNMD takes exactly 3 operands") + } +} + +func __asm_proxy_VPTESTNMQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPTESTNMQ(v[0], v[1], v[2]) + } else { + panic("instruction VPTESTNMQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPTESTNMW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPTESTNMW(v[0], v[1], v[2]) + } else { + panic("instruction VPTESTNMW takes exactly 3 operands") + } +} + +func __asm_proxy_VPUNPCKHBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPUNPCKHBW(v[0], v[1], v[2]) + } else { + panic("instruction VPUNPCKHBW takes exactly 3 operands") + } +} + +func __asm_proxy_VPUNPCKHDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPUNPCKHDQ(v[0], v[1], v[2]) + } else { + panic("instruction VPUNPCKHDQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPUNPCKHQDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPUNPCKHQDQ(v[0], v[1], v[2]) + } else { + panic("instruction VPUNPCKHQDQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPUNPCKHWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPUNPCKHWD(v[0], v[1], v[2]) + } else { + panic("instruction VPUNPCKHWD takes exactly 3 operands") + } +} + +func __asm_proxy_VPUNPCKLBW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPUNPCKLBW(v[0], v[1], v[2]) + } else { + panic("instruction VPUNPCKLBW takes exactly 3 operands") + } +} + +func __asm_proxy_VPUNPCKLDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPUNPCKLDQ(v[0], v[1], v[2]) + } else { + panic("instruction VPUNPCKLDQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPUNPCKLQDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPUNPCKLQDQ(v[0], v[1], v[2]) + } else { + panic("instruction VPUNPCKLQDQ takes exactly 3 operands") + } +} + +func __asm_proxy_VPUNPCKLWD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPUNPCKLWD(v[0], v[1], v[2]) + } else { + panic("instruction VPUNPCKLWD takes exactly 3 operands") + } +} + +func __asm_proxy_VPXOR__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPXOR(v[0], v[1], v[2]) + } else { + panic("instruction VPXOR takes exactly 3 operands") + } +} + +func __asm_proxy_VPXORD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPXORD(v[0], v[1], v[2]) + } else { + panic("instruction VPXORD takes exactly 3 operands") + } +} + +func __asm_proxy_VPXORQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VPXORQ(v[0], v[1], v[2]) + } else { + panic("instruction VPXORQ takes exactly 3 operands") + } +} + +func __asm_proxy_VRANGEPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VRANGEPD(v[0], v[1], v[2], v[3]) + case 5 : return p.VRANGEPD(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VRANGEPD takes 4 or 5 operands") + } +} + +func __asm_proxy_VRANGEPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VRANGEPS(v[0], v[1], v[2], v[3]) + case 5 : return p.VRANGEPS(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VRANGEPS takes 4 or 5 operands") + } +} + +func __asm_proxy_VRANGESD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VRANGESD(v[0], v[1], v[2], v[3]) + case 5 : return p.VRANGESD(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VRANGESD takes 4 or 5 operands") + } +} + +func __asm_proxy_VRANGESS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VRANGESS(v[0], v[1], v[2], v[3]) + case 5 : return p.VRANGESS(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VRANGESS takes 4 or 5 operands") + } +} + +func __asm_proxy_VRCP14PD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VRCP14PD(v[0], v[1]) + } else { + panic("instruction VRCP14PD takes exactly 2 operands") + } +} + +func __asm_proxy_VRCP14PS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VRCP14PS(v[0], v[1]) + } else { + panic("instruction VRCP14PS takes exactly 2 operands") + } +} + +func __asm_proxy_VRCP14SD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VRCP14SD(v[0], v[1], v[2]) + } else { + panic("instruction VRCP14SD takes exactly 3 operands") + } +} + +func __asm_proxy_VRCP14SS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VRCP14SS(v[0], v[1], v[2]) + } else { + panic("instruction VRCP14SS takes exactly 3 operands") + } +} + +func __asm_proxy_VRCP28PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VRCP28PD(v[0], v[1]) + case 3 : return p.VRCP28PD(v[0], v[1], v[2]) + default : panic("instruction VRCP28PD takes 2 or 3 operands") + } +} + +func __asm_proxy_VRCP28PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VRCP28PS(v[0], v[1]) + case 3 : return p.VRCP28PS(v[0], v[1], v[2]) + default : panic("instruction VRCP28PS takes 2 or 3 operands") + } +} + +func __asm_proxy_VRCP28SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VRCP28SD(v[0], v[1], v[2]) + case 4 : return p.VRCP28SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VRCP28SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VRCP28SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VRCP28SS(v[0], v[1], v[2]) + case 4 : return p.VRCP28SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VRCP28SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VRCPPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VRCPPS(v[0], v[1]) + } else { + panic("instruction VRCPPS takes exactly 2 operands") + } +} + +func __asm_proxy_VRCPSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VRCPSS(v[0], v[1], v[2]) + } else { + panic("instruction VRCPSS takes exactly 3 operands") + } +} + +func __asm_proxy_VREDUCEPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VREDUCEPD(v[0], v[1], v[2]) + } else { + panic("instruction VREDUCEPD takes exactly 3 operands") + } +} + +func __asm_proxy_VREDUCEPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VREDUCEPS(v[0], v[1], v[2]) + } else { + panic("instruction VREDUCEPS takes exactly 3 operands") + } +} + +func __asm_proxy_VREDUCESD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VREDUCESD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VREDUCESD takes exactly 4 operands") + } +} + +func __asm_proxy_VREDUCESS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VREDUCESS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VREDUCESS takes exactly 4 operands") + } +} + +func __asm_proxy_VRNDSCALEPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VRNDSCALEPD(v[0], v[1], v[2]) + case 4 : return p.VRNDSCALEPD(v[0], v[1], v[2], v[3]) + default : panic("instruction VRNDSCALEPD takes 3 or 4 operands") + } +} + +func __asm_proxy_VRNDSCALEPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VRNDSCALEPS(v[0], v[1], v[2]) + case 4 : return p.VRNDSCALEPS(v[0], v[1], v[2], v[3]) + default : panic("instruction VRNDSCALEPS takes 3 or 4 operands") + } +} + +func __asm_proxy_VRNDSCALESD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VRNDSCALESD(v[0], v[1], v[2], v[3]) + case 5 : return p.VRNDSCALESD(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VRNDSCALESD takes 4 or 5 operands") + } +} + +func __asm_proxy_VRNDSCALESS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 4 : return p.VRNDSCALESS(v[0], v[1], v[2], v[3]) + case 5 : return p.VRNDSCALESS(v[0], v[1], v[2], v[3], v[4]) + default : panic("instruction VRNDSCALESS takes 4 or 5 operands") + } +} + +func __asm_proxy_VROUNDPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VROUNDPD(v[0], v[1], v[2]) + } else { + panic("instruction VROUNDPD takes exactly 3 operands") + } +} + +func __asm_proxy_VROUNDPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VROUNDPS(v[0], v[1], v[2]) + } else { + panic("instruction VROUNDPS takes exactly 3 operands") + } +} + +func __asm_proxy_VROUNDSD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VROUNDSD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VROUNDSD takes exactly 4 operands") + } +} + +func __asm_proxy_VROUNDSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VROUNDSS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VROUNDSS takes exactly 4 operands") + } +} + +func __asm_proxy_VRSQRT14PD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VRSQRT14PD(v[0], v[1]) + } else { + panic("instruction VRSQRT14PD takes exactly 2 operands") + } +} + +func __asm_proxy_VRSQRT14PS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VRSQRT14PS(v[0], v[1]) + } else { + panic("instruction VRSQRT14PS takes exactly 2 operands") + } +} + +func __asm_proxy_VRSQRT14SD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VRSQRT14SD(v[0], v[1], v[2]) + } else { + panic("instruction VRSQRT14SD takes exactly 3 operands") + } +} + +func __asm_proxy_VRSQRT14SS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VRSQRT14SS(v[0], v[1], v[2]) + } else { + panic("instruction VRSQRT14SS takes exactly 3 operands") + } +} + +func __asm_proxy_VRSQRT28PD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VRSQRT28PD(v[0], v[1]) + case 3 : return p.VRSQRT28PD(v[0], v[1], v[2]) + default : panic("instruction VRSQRT28PD takes 2 or 3 operands") + } +} + +func __asm_proxy_VRSQRT28PS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VRSQRT28PS(v[0], v[1]) + case 3 : return p.VRSQRT28PS(v[0], v[1], v[2]) + default : panic("instruction VRSQRT28PS takes 2 or 3 operands") + } +} + +func __asm_proxy_VRSQRT28SD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VRSQRT28SD(v[0], v[1], v[2]) + case 4 : return p.VRSQRT28SD(v[0], v[1], v[2], v[3]) + default : panic("instruction VRSQRT28SD takes 3 or 4 operands") + } +} + +func __asm_proxy_VRSQRT28SS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VRSQRT28SS(v[0], v[1], v[2]) + case 4 : return p.VRSQRT28SS(v[0], v[1], v[2], v[3]) + default : panic("instruction VRSQRT28SS takes 3 or 4 operands") + } +} + +func __asm_proxy_VRSQRTPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VRSQRTPS(v[0], v[1]) + } else { + panic("instruction VRSQRTPS takes exactly 2 operands") + } +} + +func __asm_proxy_VRSQRTSS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VRSQRTSS(v[0], v[1], v[2]) + } else { + panic("instruction VRSQRTSS takes exactly 3 operands") + } +} + +func __asm_proxy_VSCALEFPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VSCALEFPD(v[0], v[1], v[2]) + case 4 : return p.VSCALEFPD(v[0], v[1], v[2], v[3]) + default : panic("instruction VSCALEFPD takes 3 or 4 operands") + } +} + +func __asm_proxy_VSCALEFPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VSCALEFPS(v[0], v[1], v[2]) + case 4 : return p.VSCALEFPS(v[0], v[1], v[2], v[3]) + default : panic("instruction VSCALEFPS takes 3 or 4 operands") + } +} + +func __asm_proxy_VSCALEFSD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VSCALEFSD(v[0], v[1], v[2]) + case 4 : return p.VSCALEFSD(v[0], v[1], v[2], v[3]) + default : panic("instruction VSCALEFSD takes 3 or 4 operands") + } +} + +func __asm_proxy_VSCALEFSS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VSCALEFSS(v[0], v[1], v[2]) + case 4 : return p.VSCALEFSS(v[0], v[1], v[2], v[3]) + default : panic("instruction VSCALEFSS takes 3 or 4 operands") + } +} + +func __asm_proxy_VSCATTERDPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VSCATTERDPD(v[0], v[1]) + } else { + panic("instruction VSCATTERDPD takes exactly 2 operands") + } +} + +func __asm_proxy_VSCATTERDPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VSCATTERDPS(v[0], v[1]) + } else { + panic("instruction VSCATTERDPS takes exactly 2 operands") + } +} + +func __asm_proxy_VSCATTERPF0DPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VSCATTERPF0DPD(v[0]) + } else { + panic("instruction VSCATTERPF0DPD takes exactly 1 operand") + } +} + +func __asm_proxy_VSCATTERPF0DPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VSCATTERPF0DPS(v[0]) + } else { + panic("instruction VSCATTERPF0DPS takes exactly 1 operand") + } +} + +func __asm_proxy_VSCATTERPF0QPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VSCATTERPF0QPD(v[0]) + } else { + panic("instruction VSCATTERPF0QPD takes exactly 1 operand") + } +} + +func __asm_proxy_VSCATTERPF0QPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VSCATTERPF0QPS(v[0]) + } else { + panic("instruction VSCATTERPF0QPS takes exactly 1 operand") + } +} + +func __asm_proxy_VSCATTERPF1DPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VSCATTERPF1DPD(v[0]) + } else { + panic("instruction VSCATTERPF1DPD takes exactly 1 operand") + } +} + +func __asm_proxy_VSCATTERPF1DPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VSCATTERPF1DPS(v[0]) + } else { + panic("instruction VSCATTERPF1DPS takes exactly 1 operand") + } +} + +func __asm_proxy_VSCATTERPF1QPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VSCATTERPF1QPD(v[0]) + } else { + panic("instruction VSCATTERPF1QPD takes exactly 1 operand") + } +} + +func __asm_proxy_VSCATTERPF1QPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VSCATTERPF1QPS(v[0]) + } else { + panic("instruction VSCATTERPF1QPS takes exactly 1 operand") + } +} + +func __asm_proxy_VSCATTERQPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VSCATTERQPD(v[0], v[1]) + } else { + panic("instruction VSCATTERQPD takes exactly 2 operands") + } +} + +func __asm_proxy_VSCATTERQPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VSCATTERQPS(v[0], v[1]) + } else { + panic("instruction VSCATTERQPS takes exactly 2 operands") + } +} + +func __asm_proxy_VSHUFF32X4__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VSHUFF32X4(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VSHUFF32X4 takes exactly 4 operands") + } +} + +func __asm_proxy_VSHUFF64X2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VSHUFF64X2(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VSHUFF64X2 takes exactly 4 operands") + } +} + +func __asm_proxy_VSHUFI32X4__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VSHUFI32X4(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VSHUFI32X4 takes exactly 4 operands") + } +} + +func __asm_proxy_VSHUFI64X2__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VSHUFI64X2(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VSHUFI64X2 takes exactly 4 operands") + } +} + +func __asm_proxy_VSHUFPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VSHUFPD(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VSHUFPD takes exactly 4 operands") + } +} + +func __asm_proxy_VSHUFPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 4 { + return p.VSHUFPS(v[0], v[1], v[2], v[3]) + } else { + panic("instruction VSHUFPS takes exactly 4 operands") + } +} + +func __asm_proxy_VSQRTPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VSQRTPD(v[0], v[1]) + case 3 : return p.VSQRTPD(v[0], v[1], v[2]) + default : panic("instruction VSQRTPD takes 2 or 3 operands") + } +} + +func __asm_proxy_VSQRTPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VSQRTPS(v[0], v[1]) + case 3 : return p.VSQRTPS(v[0], v[1], v[2]) + default : panic("instruction VSQRTPS takes 2 or 3 operands") + } +} + +func __asm_proxy_VSQRTSD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VSQRTSD(v[0], v[1], v[2]) + case 4 : return p.VSQRTSD(v[0], v[1], v[2], v[3]) + default : panic("instruction VSQRTSD takes 3 or 4 operands") + } +} + +func __asm_proxy_VSQRTSS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VSQRTSS(v[0], v[1], v[2]) + case 4 : return p.VSQRTSS(v[0], v[1], v[2], v[3]) + default : panic("instruction VSQRTSS takes 3 or 4 operands") + } +} + +func __asm_proxy_VSTMXCSR__(p *Program, v ...interface{}) *Instruction { + if len(v) == 1 { + return p.VSTMXCSR(v[0]) + } else { + panic("instruction VSTMXCSR takes exactly 1 operand") + } +} + +func __asm_proxy_VSUBPD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VSUBPD(v[0], v[1], v[2]) + case 4 : return p.VSUBPD(v[0], v[1], v[2], v[3]) + default : panic("instruction VSUBPD takes 3 or 4 operands") + } +} + +func __asm_proxy_VSUBPS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VSUBPS(v[0], v[1], v[2]) + case 4 : return p.VSUBPS(v[0], v[1], v[2], v[3]) + default : panic("instruction VSUBPS takes 3 or 4 operands") + } +} + +func __asm_proxy_VSUBSD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VSUBSD(v[0], v[1], v[2]) + case 4 : return p.VSUBSD(v[0], v[1], v[2], v[3]) + default : panic("instruction VSUBSD takes 3 or 4 operands") + } +} + +func __asm_proxy_VSUBSS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 3 : return p.VSUBSS(v[0], v[1], v[2]) + case 4 : return p.VSUBSS(v[0], v[1], v[2], v[3]) + default : panic("instruction VSUBSS takes 3 or 4 operands") + } +} + +func __asm_proxy_VTESTPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VTESTPD(v[0], v[1]) + } else { + panic("instruction VTESTPD takes exactly 2 operands") + } +} + +func __asm_proxy_VTESTPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.VTESTPS(v[0], v[1]) + } else { + panic("instruction VTESTPS takes exactly 2 operands") + } +} + +func __asm_proxy_VUCOMISD__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VUCOMISD(v[0], v[1]) + case 3 : return p.VUCOMISD(v[0], v[1], v[2]) + default : panic("instruction VUCOMISD takes 2 or 3 operands") + } +} + +func __asm_proxy_VUCOMISS__(p *Program, v ...interface{}) *Instruction { + switch len(v) { + case 2 : return p.VUCOMISS(v[0], v[1]) + case 3 : return p.VUCOMISS(v[0], v[1], v[2]) + default : panic("instruction VUCOMISS takes 2 or 3 operands") + } +} + +func __asm_proxy_VUNPCKHPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VUNPCKHPD(v[0], v[1], v[2]) + } else { + panic("instruction VUNPCKHPD takes exactly 3 operands") + } +} + +func __asm_proxy_VUNPCKHPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VUNPCKHPS(v[0], v[1], v[2]) + } else { + panic("instruction VUNPCKHPS takes exactly 3 operands") + } +} + +func __asm_proxy_VUNPCKLPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VUNPCKLPD(v[0], v[1], v[2]) + } else { + panic("instruction VUNPCKLPD takes exactly 3 operands") + } +} + +func __asm_proxy_VUNPCKLPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VUNPCKLPS(v[0], v[1], v[2]) + } else { + panic("instruction VUNPCKLPS takes exactly 3 operands") + } +} + +func __asm_proxy_VXORPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VXORPD(v[0], v[1], v[2]) + } else { + panic("instruction VXORPD takes exactly 3 operands") + } +} + +func __asm_proxy_VXORPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 3 { + return p.VXORPS(v[0], v[1], v[2]) + } else { + panic("instruction VXORPS takes exactly 3 operands") + } +} + +func __asm_proxy_VZEROALL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.VZEROALL() + } else { + panic("instruction VZEROALL takes no operands") + } +} + +func __asm_proxy_VZEROUPPER__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.VZEROUPPER() + } else { + panic("instruction VZEROUPPER takes no operands") + } +} + +func __asm_proxy_XADDB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.XADDB(v[0], v[1]) + } else { + panic("instruction XADDB takes exactly 2 operands") + } +} + +func __asm_proxy_XADDL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.XADDL(v[0], v[1]) + } else { + panic("instruction XADDL takes exactly 2 operands") + } +} + +func __asm_proxy_XADDQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.XADDQ(v[0], v[1]) + } else { + panic("instruction XADDQ takes exactly 2 operands") + } +} + +func __asm_proxy_XADDW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.XADDW(v[0], v[1]) + } else { + panic("instruction XADDW takes exactly 2 operands") + } +} + +func __asm_proxy_XCHGB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.XCHGB(v[0], v[1]) + } else { + panic("instruction XCHGB takes exactly 2 operands") + } +} + +func __asm_proxy_XCHGL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.XCHGL(v[0], v[1]) + } else { + panic("instruction XCHGL takes exactly 2 operands") + } +} + +func __asm_proxy_XCHGQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.XCHGQ(v[0], v[1]) + } else { + panic("instruction XCHGQ takes exactly 2 operands") + } +} + +func __asm_proxy_XCHGW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.XCHGW(v[0], v[1]) + } else { + panic("instruction XCHGW takes exactly 2 operands") + } +} + +func __asm_proxy_XGETBV__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.XGETBV() + } else { + panic("instruction XGETBV takes no operands") + } +} + +func __asm_proxy_XLATB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 0 { + return p.XLATB() + } else { + panic("instruction XLATB takes no operands") + } +} + +func __asm_proxy_XORB__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.XORB(v[0], v[1]) + } else { + panic("instruction XORB takes exactly 2 operands") + } +} + +func __asm_proxy_XORL__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.XORL(v[0], v[1]) + } else { + panic("instruction XORL takes exactly 2 operands") + } +} + +func __asm_proxy_XORPD__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.XORPD(v[0], v[1]) + } else { + panic("instruction XORPD takes exactly 2 operands") + } +} + +func __asm_proxy_XORPS__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.XORPS(v[0], v[1]) + } else { + panic("instruction XORPS takes exactly 2 operands") + } +} + +func __asm_proxy_XORQ__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.XORQ(v[0], v[1]) + } else { + panic("instruction XORQ takes exactly 2 operands") + } +} + +func __asm_proxy_XORW__(p *Program, v ...interface{}) *Instruction { + if len(v) == 2 { + return p.XORW(v[0], v[1]) + } else { + panic("instruction XORW takes exactly 2 operands") + } +} diff --git a/vendor/github.com/cloudwego/iasm/x86_64/operands.go b/vendor/github.com/cloudwego/iasm/x86_64/operands.go new file mode 100644 index 000000000..61adbf83b --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/x86_64/operands.go @@ -0,0 +1,665 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package x86_64 + +import ( + "errors" + "fmt" + "math" + "reflect" + "strconv" + "strings" + "sync/atomic" +) + +// RelativeOffset represents an RIP-relative offset. +type RelativeOffset int32 + +// String implements the fmt.Stringer interface. +func (self RelativeOffset) String() string { + if self == 0 { + return "(%rip)" + } else { + return fmt.Sprintf("%d(%%rip)", self) + } +} + +// RoundingControl represents a floating-point rounding option. +type RoundingControl uint8 + +const ( + // RN_SAE represents "Round Nearest", which is the default rounding option. + RN_SAE RoundingControl = iota + + // RD_SAE represents "Round Down". + RD_SAE + + // RU_SAE represents "Round Up". + RU_SAE + + // RZ_SAE represents "Round towards Zero". + RZ_SAE +) + +var _RC_NAMES = map[RoundingControl]string{ + RN_SAE: "rn-sae", + RD_SAE: "rd-sae", + RU_SAE: "ru-sae", + RZ_SAE: "rz-sae", +} + +func (self RoundingControl) String() string { + if v, ok := _RC_NAMES[self]; ok { + return v + } else { + panic("invalid RoundingControl value") + } +} + +// ExceptionControl represents the "Suppress All Exceptions" flag. +type ExceptionControl uint8 + +const ( + // SAE represents the flag "Suppress All Exceptions" for floating point operations. + SAE ExceptionControl = iota +) + +func (ExceptionControl) String() string { + return "sae" +} + +// AddressType indicates which kind of value that an Addressable object contains. +type AddressType uint + +const ( + // None indicates the Addressable does not contain any addressable value. + None AddressType = iota + + // Memory indicates the Addressable contains a memory address. + Memory + + // Offset indicates the Addressable contains an RIP-relative offset. + Offset + + // Reference indicates the Addressable contains a label reference. + Reference +) + +// Disposable is a type of object that can be Free'd manually. +type Disposable interface { + Free() +} + +// Label represents a location within the program. +type Label struct { + refs int64 + Name string + Dest *Instruction +} + +func (self *Label) offset(p uintptr, n int) RelativeOffset { + if self.Dest == nil { + panic("unresolved label: " + self.Name) + } else { + return RelativeOffset(self.Dest.pc - p - uintptr(n)) + } +} + +// Free decreases the reference count of a Label, if the +// refcount drops to 0, the Label will be recycled. +func (self *Label) Free() { + if atomic.AddInt64(&self.refs, -1) == 0 { + //freeLabel(self) + } +} + +// String implements the fmt.Stringer interface. +func (self *Label) String() string { + if self.Dest == nil { + return fmt.Sprintf("%s(%%rip)", self.Name) + } else { + return fmt.Sprintf("%s(%%rip)@%#x", self.Name, self.Dest.pc) + } +} + +// Retain increases the reference count of a Label. +func (self *Label) Retain() *Label { + atomic.AddInt64(&self.refs, 1) + return self +} + +// Evaluate implements the interface expr.Term. +func (self *Label) Evaluate() (int64, error) { + if self.Dest != nil { + return int64(self.Dest.pc), nil + } else { + return 0, errors.New("unresolved label: " + self.Name) + } +} + +// Addressable is a union to represent an addressable operand. +type Addressable struct { + Type AddressType + Memory MemoryAddress + Offset RelativeOffset + Reference *Label +} + +// String implements the fmt.Stringer interface. +func (self *Addressable) String() string { + switch self.Type { + case None: + return "(not addressable)" + case Memory: + return self.Memory.String() + case Offset: + return self.Offset.String() + case Reference: + return self.Reference.String() + default: + return "(invalid addressable)" + } +} + +// MemoryOperand represents a memory operand for an instruction. +type MemoryOperand struct { + refs int64 + Size int + Addr Addressable + Mask RegisterMask + Masked bool + Broadcast uint8 +} + +const ( + _Sizes = 0b10000000100010111 // bit-mask for valid sizes (0, 1, 2, 4, 8, 16) +) + +func (self *MemoryOperand) isVMX(evex bool) bool { + return self.Addr.Type == Memory && self.Addr.Memory.isVMX(evex) +} + +func (self *MemoryOperand) isVMY(evex bool) bool { + return self.Addr.Type == Memory && self.Addr.Memory.isVMY(evex) +} + +func (self *MemoryOperand) isVMZ() bool { + return self.Addr.Type == Memory && self.Addr.Memory.isVMZ() +} + +func (self *MemoryOperand) isMem() bool { + if (_Sizes & (1 << self.Broadcast)) == 0 { + return false + } else if self.Addr.Type == Memory { + return self.Addr.Memory.isMem() + } else if self.Addr.Type == Offset { + return true + } else if self.Addr.Type == Reference { + return true + } else { + return false + } +} + +func (self *MemoryOperand) isSize(n int) bool { + return self.Size == 0 || self.Size == n +} + +func (self *MemoryOperand) isBroadcast(n int, b uint8) bool { + return self.Size == n && self.Broadcast == b +} + +func (self *MemoryOperand) formatMask() string { + if !self.Masked { + return "" + } else { + return self.Mask.String() + } +} + +func (self *MemoryOperand) formatBroadcast() string { + if self.Broadcast == 0 { + return "" + } else { + return fmt.Sprintf("{1to%d}", self.Broadcast) + } +} + +func (self *MemoryOperand) ensureAddrValid() { + switch self.Addr.Type { + case None: + break + case Memory: + self.Addr.Memory.EnsureValid() + case Offset: + break + case Reference: + break + default: + panic("invalid address type") + } +} + +func (self *MemoryOperand) ensureSizeValid() { + if (_Sizes & (1 << self.Size)) == 0 { + panic("invalid memory operand size") + } +} + +func (self *MemoryOperand) ensureBroadcastValid() { + if (_Sizes & (1 << self.Broadcast)) == 0 { + panic("invalid memory operand broadcast") + } +} + +// Free decreases the reference count of a MemoryOperand, if the +// refcount drops to 0, the Label will be recycled. +func (self *MemoryOperand) Free() { + if atomic.AddInt64(&self.refs, -1) == 0 { + //freeMemoryOperand(self) + } +} + +// String implements the fmt.Stringer interface. +func (self *MemoryOperand) String() string { + return self.Addr.String() + self.formatMask() + self.formatBroadcast() +} + +// Retain increases the reference count of a MemoryOperand. +func (self *MemoryOperand) Retain() *MemoryOperand { + atomic.AddInt64(&self.refs, 1) + return self +} + +// EnsureValid checks if the memory operand is valid, if not, it panics. +func (self *MemoryOperand) EnsureValid() { + self.ensureAddrValid() + self.ensureSizeValid() + self.ensureBroadcastValid() +} + +// MemoryAddress represents a memory address. +type MemoryAddress struct { + Base Register + Index Register + Scale uint8 + Displacement int32 +} + +const ( + _Scales = 0b100010111 // bit-mask for valid scales (0, 1, 2, 4, 8) +) + +func (self *MemoryAddress) isVMX(evex bool) bool { + return self.isMemBase() && (self.Index == nil || isXMM(self.Index) || (evex && isEVEXXMM(self.Index))) +} + +func (self *MemoryAddress) isVMY(evex bool) bool { + return self.isMemBase() && (self.Index == nil || isYMM(self.Index) || (evex && isEVEXYMM(self.Index))) +} + +func (self *MemoryAddress) isVMZ() bool { + return self.isMemBase() && (self.Index == nil || isZMM(self.Index)) +} + +func (self *MemoryAddress) isMem() bool { + return self.isMemBase() && (self.Index == nil || isReg64(self.Index)) +} + +func (self *MemoryAddress) isMemBase() bool { + return (self.Base == nil || isReg64(self.Base)) && // `Base` must be 64-bit if present + (self.Scale == 0) == (self.Index == nil) && // `Scale` and `Index` depends on each other + (_Scales&(1<<self.Scale)) != 0 // `Scale` can only be 0, 1, 2, 4 or 8 +} + +// String implements the fmt.Stringer interface. +func (self *MemoryAddress) String() string { + var dp int + var sb strings.Builder + + /* the displacement part */ + if dp = int(self.Displacement); dp != 0 { + sb.WriteString(strconv.Itoa(dp)) + } + + /* the base register */ + if sb.WriteByte('('); self.Base != nil { + sb.WriteByte('%') + sb.WriteString(self.Base.String()) + } + + /* index is optional */ + if self.Index != nil { + sb.WriteString(",%") + sb.WriteString(self.Index.String()) + + /* scale is also optional */ + if self.Scale >= 2 { + sb.WriteByte(',') + sb.WriteString(strconv.Itoa(int(self.Scale))) + } + } + + /* close the bracket */ + sb.WriteByte(')') + return sb.String() +} + +// EnsureValid checks if the memory address is valid, if not, it panics. +func (self *MemoryAddress) EnsureValid() { + if !self.isMemBase() || (self.Index != nil && !isIndexable(self.Index)) { + panic("not a valid memory address") + } +} + +// Ref constructs a memory reference to a label. +func Ref(ref *Label) (v *MemoryOperand) { + v = CreateMemoryOperand() + v.Addr.Type = Reference + v.Addr.Reference = ref + return +} + +// Abs construct a simple memory address that represents absolute addressing. +func Abs(disp int32) *MemoryOperand { + return Sib(nil, nil, 0, disp) +} + +// Ptr constructs a simple memory operand with base and displacement. +func Ptr(base Register, disp int32) *MemoryOperand { + return Sib(base, nil, 0, disp) +} + +// Sib constructs a simple memory operand that represents a complete memory address. +func Sib(base Register, index Register, scale uint8, disp int32) (v *MemoryOperand) { + v = CreateMemoryOperand() + v.Addr.Type = Memory + v.Addr.Memory.Base = base + v.Addr.Memory.Index = index + v.Addr.Memory.Scale = scale + v.Addr.Memory.Displacement = disp + v.EnsureValid() + return +} + +/** Operand Matching Helpers **/ + +const _IntMask = (1 << reflect.Int) | + (1 << reflect.Int8) | + (1 << reflect.Int16) | + (1 << reflect.Int32) | + (1 << reflect.Int64) | + (1 << reflect.Uint) | + (1 << reflect.Uint8) | + (1 << reflect.Uint16) | + (1 << reflect.Uint32) | + (1 << reflect.Uint64) | + (1 << reflect.Uintptr) + +func isInt(k reflect.Kind) bool { + return (_IntMask & (1 << k)) != 0 +} + +func asInt64(v interface{}) (int64, bool) { + if isSpecial(v) { + return 0, false + } else if x := efaceOf(v); isInt(x.kind()) { + return x.toInt64(), true + } else { + return 0, false + } +} + +func inRange(v interface{}, low int64, high int64) bool { + x, ok := asInt64(v) + return ok && x >= low && x <= high +} + +func isSpecial(v interface{}) bool { + switch v.(type) { + case Register8: + return true + case Register16: + return true + case Register32: + return true + case Register64: + return true + case KRegister: + return true + case MMRegister: + return true + case XMMRegister: + return true + case YMMRegister: + return true + case ZMMRegister: + return true + case RelativeOffset: + return true + case RoundingControl: + return true + case ExceptionControl: + return true + default: + return false + } +} + +func isIndexable(v interface{}) bool { + return isZMM(v) || isReg64(v) || isEVEXXMM(v) || isEVEXYMM(v) +} + +func isImm4(v interface{}) bool { return inRange(v, 0, 15) } +func isImm8(v interface{}) bool { return inRange(v, math.MinInt8, math.MaxUint8) } +func isImm16(v interface{}) bool { return inRange(v, math.MinInt16, math.MaxUint16) } +func isImm32(v interface{}) bool { return inRange(v, math.MinInt32, math.MaxUint32) } +func isImm64(v interface{}) bool { _, r := asInt64(v); return r } +func isConst1(v interface{}) bool { x, r := asInt64(v); return r && x == 1 } +func isConst3(v interface{}) bool { x, r := asInt64(v); return r && x == 3 } +func isRel8(v interface{}) bool { + x, r := v.(RelativeOffset) + return r && x >= math.MinInt8 && x <= math.MaxInt8 +} +func isRel32(v interface{}) bool { _, r := v.(RelativeOffset); return r } +func isLabel(v interface{}) bool { _, r := v.(*Label); return r } +func isReg8(v interface{}) bool { _, r := v.(Register8); return r } +func isReg8REX(v interface{}) bool { + x, r := v.(Register8) + return r && (x&0x80) == 0 && x >= SPL +} +func isReg16(v interface{}) bool { _, r := v.(Register16); return r } +func isReg32(v interface{}) bool { _, r := v.(Register32); return r } +func isReg64(v interface{}) bool { _, r := v.(Register64); return r } +func isMM(v interface{}) bool { _, r := v.(MMRegister); return r } +func isXMM(v interface{}) bool { x, r := v.(XMMRegister); return r && x <= XMM15 } +func isEVEXXMM(v interface{}) bool { _, r := v.(XMMRegister); return r } +func isXMMk(v interface{}) bool { + x, r := v.(MaskedRegister) + return isXMM(v) || (r && isXMM(x.Reg) && !x.Mask.Z) +} +func isXMMkz(v interface{}) bool { + x, r := v.(MaskedRegister) + return isXMM(v) || (r && isXMM(x.Reg)) +} +func isYMM(v interface{}) bool { x, r := v.(YMMRegister); return r && x <= YMM15 } +func isEVEXYMM(v interface{}) bool { _, r := v.(YMMRegister); return r } +func isYMMk(v interface{}) bool { + x, r := v.(MaskedRegister) + return isYMM(v) || (r && isYMM(x.Reg) && !x.Mask.Z) +} +func isYMMkz(v interface{}) bool { + x, r := v.(MaskedRegister) + return isYMM(v) || (r && isYMM(x.Reg)) +} +func isZMM(v interface{}) bool { _, r := v.(ZMMRegister); return r } +func isZMMk(v interface{}) bool { + x, r := v.(MaskedRegister) + return isZMM(v) || (r && isZMM(x.Reg) && !x.Mask.Z) +} +func isZMMkz(v interface{}) bool { + x, r := v.(MaskedRegister) + return isZMM(v) || (r && isZMM(x.Reg)) +} +func isK(v interface{}) bool { _, r := v.(KRegister); return r } +func isKk(v interface{}) bool { + x, r := v.(MaskedRegister) + return isK(v) || (r && isK(x.Reg) && !x.Mask.Z) +} +func isM(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && x.isMem() && x.Broadcast == 0 && !x.Masked +} +func isMk(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && x.isMem() && x.Broadcast == 0 && !(x.Masked && x.Mask.Z) +} +func isMkz(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && x.isMem() && x.Broadcast == 0 +} +func isM8(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && isM(v) && x.isSize(1) +} +func isM16(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && isM(v) && x.isSize(2) +} +func isM16kz(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && isMkz(v) && x.isSize(2) +} +func isM32(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && isM(v) && x.isSize(4) +} +func isM32k(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && isMk(v) && x.isSize(4) +} +func isM32kz(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && isMkz(v) && x.isSize(4) +} +func isM64(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && isM(v) && x.isSize(8) +} +func isM64k(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && isMk(v) && x.isSize(8) +} +func isM64kz(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && isMkz(v) && x.isSize(8) +} +func isM128(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && isM(v) && x.isSize(16) +} +func isM128kz(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && isMkz(v) && x.isSize(16) +} +func isM256(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && isM(v) && x.isSize(32) +} +func isM256kz(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && isMkz(v) && x.isSize(32) +} +func isM512(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && isM(v) && x.isSize(64) +} +func isM512kz(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && isMkz(v) && x.isSize(64) +} +func isM64M32bcst(v interface{}) bool { + x, r := v.(*MemoryOperand) + return isM64(v) || (r && x.isBroadcast(4, 2)) +} +func isM128M32bcst(v interface{}) bool { + x, r := v.(*MemoryOperand) + return isM128(v) || (r && x.isBroadcast(4, 4)) +} +func isM256M32bcst(v interface{}) bool { + x, r := v.(*MemoryOperand) + return isM256(v) || (r && x.isBroadcast(4, 8)) +} +func isM512M32bcst(v interface{}) bool { + x, r := v.(*MemoryOperand) + return isM512(v) || (r && x.isBroadcast(4, 16)) +} +func isM128M64bcst(v interface{}) bool { + x, r := v.(*MemoryOperand) + return isM128(v) || (r && x.isBroadcast(8, 2)) +} +func isM256M64bcst(v interface{}) bool { + x, r := v.(*MemoryOperand) + return isM256(v) || (r && x.isBroadcast(8, 4)) +} +func isM512M64bcst(v interface{}) bool { + x, r := v.(*MemoryOperand) + return isM512(v) || (r && x.isBroadcast(8, 8)) +} +func isVMX(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && x.isVMX(false) && !x.Masked +} +func isEVEXVMX(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && x.isVMX(true) && !x.Masked +} +func isVMXk(v interface{}) bool { x, r := v.(*MemoryOperand); return r && x.isVMX(true) } +func isVMY(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && x.isVMY(false) && !x.Masked +} +func isEVEXVMY(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && x.isVMY(true) && !x.Masked +} +func isVMYk(v interface{}) bool { x, r := v.(*MemoryOperand); return r && x.isVMY(true) } +func isVMZ(v interface{}) bool { + x, r := v.(*MemoryOperand) + return r && x.isVMZ() && !x.Masked +} +func isVMZk(v interface{}) bool { x, r := v.(*MemoryOperand); return r && x.isVMZ() } +func isSAE(v interface{}) bool { _, r := v.(ExceptionControl); return r } +func isER(v interface{}) bool { _, r := v.(RoundingControl); return r } + +func isImmExt(v interface{}, ext int, min int64, max int64) bool { + if x, ok := asInt64(v); !ok { + return false + } else if m := int64(1) << (8 * ext); x < m && x >= m+min { + return true + } else { + return x <= max && x >= min + } +} + +func isImm8Ext(v interface{}, ext int) bool { + return isImmExt(v, ext, math.MinInt8, math.MaxInt8) +} + +func isImm32Ext(v interface{}, ext int) bool { + return isImmExt(v, ext, math.MinInt32, math.MaxInt32) +} diff --git a/vendor/github.com/cloudwego/iasm/x86_64/pools.go b/vendor/github.com/cloudwego/iasm/x86_64/pools.go new file mode 100644 index 000000000..690db5b7a --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/x86_64/pools.go @@ -0,0 +1,54 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package x86_64 + +// CreateLabel creates a new Label, it may allocate a new one or grab one from a pool. +func CreateLabel(name string) *Label { + p := new(Label) + + /* initialize the label */ + p.refs = 1 + p.Name = name + return p +} + +func newProgram(arch *Arch) *Program { + p := new(Program) + + /* initialize the program */ + p.arch = arch + return p +} + +func newInstruction(name string, argc int, argv Operands) *Instruction { + p := new(Instruction) + + /* initialize the instruction */ + p.name = name + p.argc = argc + p.argv = argv + return p +} + +// CreateMemoryOperand creates a new MemoryOperand, it may allocate a new one or grab one from a pool. +func CreateMemoryOperand() *MemoryOperand { + p := new(MemoryOperand) + + /* initialize the memory operand */ + p.refs = 1 + return p +} diff --git a/vendor/github.com/cloudwego/iasm/x86_64/program.go b/vendor/github.com/cloudwego/iasm/x86_64/program.go new file mode 100644 index 000000000..f0c9b18c6 --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/x86_64/program.go @@ -0,0 +1,584 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package x86_64 + +import ( + "fmt" + "math" + "math/bits" + + "github.com/cloudwego/iasm/expr" +) + +type ( + _PseudoType int + _InstructionEncoder func(*Program, ...interface{}) *Instruction +) + +const ( + _PseudoNop _PseudoType = iota + 1 + _PseudoByte + _PseudoWord + _PseudoLong + _PseudoQuad + _PseudoData + _PseudoAlign +) + +func (self _PseudoType) String() string { + switch self { + case _PseudoNop: + return ".nop" + case _PseudoByte: + return ".byte" + case _PseudoWord: + return ".word" + case _PseudoLong: + return ".long" + case _PseudoQuad: + return ".quad" + case _PseudoData: + return ".data" + case _PseudoAlign: + return ".align" + default: + panic("unreachable") + } +} + +type _Pseudo struct { + kind _PseudoType + data []byte + uint uint64 + expr *expr.Expr +} + +func (self *_Pseudo) free() { + if self.expr != nil { + self.expr.Free() + } +} + +func (self *_Pseudo) encode(m *[]byte, pc uintptr) int { + switch self.kind { + case _PseudoNop: + return 0 + case _PseudoByte: + self.encodeByte(m) + return 1 + case _PseudoWord: + self.encodeWord(m) + return 2 + case _PseudoLong: + self.encodeLong(m) + return 4 + case _PseudoQuad: + self.encodeQuad(m) + return 8 + case _PseudoData: + self.encodeData(m) + return len(self.data) + case _PseudoAlign: + self.encodeAlign(m, pc) + return self.alignSize(pc) + default: + panic("invalid pseudo instruction") + } +} + +func (self *_Pseudo) evalExpr(low int64, high int64) int64 { + if v, err := self.expr.Evaluate(); err != nil { + panic(err) + } else if v < low || v > high { + panic(fmt.Sprintf("expression out of range [%d, %d]: %d", low, high, v)) + } else { + return v + } +} + +func (self *_Pseudo) alignSize(pc uintptr) int { + if !ispow2(self.uint) { + panic(fmt.Sprintf("aligment should be a power of 2, not %d", self.uint)) + } else { + return align(int(pc), bits.TrailingZeros64(self.uint)) - int(pc) + } +} + +func (self *_Pseudo) encodeData(m *[]byte) { + if m != nil { + *m = append(*m, self.data...) + } +} + +func (self *_Pseudo) encodeByte(m *[]byte) { + if m != nil { + append8(m, byte(self.evalExpr(math.MinInt8, math.MaxUint8))) + } +} + +func (self *_Pseudo) encodeWord(m *[]byte) { + if m != nil { + append16(m, uint16(self.evalExpr(math.MinInt16, math.MaxUint16))) + } +} + +func (self *_Pseudo) encodeLong(m *[]byte) { + if m != nil { + append32(m, uint32(self.evalExpr(math.MinInt32, math.MaxUint32))) + } +} + +func (self *_Pseudo) encodeQuad(m *[]byte) { + if m != nil { + if v, err := self.expr.Evaluate(); err != nil { + panic(err) + } else { + append64(m, uint64(v)) + } + } +} + +func (self *_Pseudo) encodeAlign(m *[]byte, pc uintptr) { + if m != nil { + if self.expr == nil { + expandmm(m, self.alignSize(pc), 0) + } else { + expandmm(m, self.alignSize(pc), byte(self.evalExpr(math.MinInt8, math.MaxUint8))) + } + } +} + +// Operands represents a sequence of operand required by an instruction. +type Operands [_N_args]interface{} + +// InstructionDomain represents the domain of an instruction. +type InstructionDomain uint8 + +const ( + DomainGeneric InstructionDomain = iota + DomainMMXSSE + DomainAVX + DomainFMA + DomainCrypto + DomainMask + DomainAMDSpecific + DomainMisc + DomainPseudo +) + +type ( + _BranchType uint8 +) + +const ( + _B_none _BranchType = iota + _B_conditional + _B_unconditional +) + +// Instruction represents an unencoded instruction. +type Instruction struct { + next *Instruction + pc uintptr + nb int + len int + argc int + name string + argv Operands + forms [_N_forms]_Encoding + pseudo _Pseudo + branch _BranchType + domain InstructionDomain + prefix []byte +} + +func (self *Instruction) add(flags int, encoder func(m *_Encoding, v []interface{})) { + self.forms[self.len].flags = flags + self.forms[self.len].encoder = encoder + self.len++ +} + +func (self *Instruction) free() { + self.clear() + self.pseudo.free() + //freeInstruction(self) +} + +func (self *Instruction) clear() { + for i := 0; i < self.argc; i++ { + if v, ok := self.argv[i].(Disposable); ok { + v.Free() + } + } +} + +func (self *Instruction) check(e *_Encoding) bool { + if (e.flags & _F_rel1) != 0 { + return isRel8(self.argv[0]) + } else if (e.flags & _F_rel4) != 0 { + return isRel32(self.argv[0]) || isLabel(self.argv[0]) + } else { + return true + } +} + +func (self *Instruction) encode(m *[]byte) int { + n := math.MaxInt64 + p := (*_Encoding)(nil) + + /* encode prefixes if any */ + if self.nb = len(self.prefix); m != nil { + *m = append(*m, self.prefix...) + } + + /* check for pseudo-instructions */ + if self.pseudo.kind != 0 { + self.nb += self.pseudo.encode(m, self.pc) + return self.nb + } + + /* find the shortest encoding */ + for i := 0; i < self.len; i++ { + if e := &self.forms[i]; self.check(e) { + if v := e.encode(self.argv[:self.argc]); v < n { + n = v + p = e + } + } + } + + /* add to buffer if needed */ + if m != nil { + *m = append(*m, p.bytes[:n]...) + } + + /* update the instruction length */ + self.nb += n + return self.nb +} + +/** Instruction Prefixes **/ + +const ( + _P_cs = 0x2e + _P_ds = 0x3e + _P_es = 0x26 + _P_fs = 0x64 + _P_gs = 0x65 + _P_ss = 0x36 + _P_lock = 0xf0 +) + +// CS overrides the memory operation of this instruction to CS. +func (self *Instruction) CS() *Instruction { + self.prefix = append(self.prefix, _P_cs) + return self +} + +// DS overrides the memory operation of this instruction to DS, +// this is the default section for most instructions if not specified. +func (self *Instruction) DS() *Instruction { + self.prefix = append(self.prefix, _P_ds) + return self +} + +// ES overrides the memory operation of this instruction to ES. +func (self *Instruction) ES() *Instruction { + self.prefix = append(self.prefix, _P_es) + return self +} + +// FS overrides the memory operation of this instruction to FS. +func (self *Instruction) FS() *Instruction { + self.prefix = append(self.prefix, _P_fs) + return self +} + +// GS overrides the memory operation of this instruction to GS. +func (self *Instruction) GS() *Instruction { + self.prefix = append(self.prefix, _P_gs) + return self +} + +// SS overrides the memory operation of this instruction to SS. +func (self *Instruction) SS() *Instruction { + self.prefix = append(self.prefix, _P_ss) + return self +} + +// LOCK causes the processor's LOCK# signal to be asserted during execution of +// the accompanying instruction (turns the instruction into an atomic instruction). +// In a multiprocessor environment, the LOCK# signal insures that the processor +// has exclusive use of any shared memory while the signal is asserted. +func (self *Instruction) LOCK() *Instruction { + self.prefix = append(self.prefix, _P_lock) + return self +} + +/** Basic Instruction Properties **/ + +// Name returns the instruction name. +func (self *Instruction) Name() string { + return self.name +} + +// Domain returns the domain of this instruction. +func (self *Instruction) Domain() InstructionDomain { + return self.domain +} + +// Operands returns the operands of this instruction. +func (self *Instruction) Operands() []interface{} { + return self.argv[:self.argc] +} + +// Program represents a sequence of instructions. +type Program struct { + arch *Arch + head *Instruction + tail *Instruction +} + +const ( + _N_near = 2 // near-branch (-128 ~ +127) takes 2 bytes to encode + _N_far_cond = 6 // conditional far-branch takes 6 bytes to encode + _N_far_uncond = 5 // unconditional far-branch takes 5 bytes to encode +) + +func (self *Program) clear() { + for p, q := self.head, self.head; p != nil; p = q { + q = p.next + p.free() + } +} + +func (self *Program) alloc(name string, argc int, argv Operands) *Instruction { + p := self.tail + q := newInstruction(name, argc, argv) + + /* attach to tail if any */ + if p != nil { + p.next = q + } else { + self.head = q + } + + /* set the new tail */ + self.tail = q + return q +} + +func (self *Program) pseudo(kind _PseudoType) (p *Instruction) { + p = self.alloc(kind.String(), 0, Operands{}) + p.domain = DomainPseudo + p.pseudo.kind = kind + return +} + +func (self *Program) require(isa ISA) { + if !self.arch.HasISA(isa) { + panic("ISA '" + isa.String() + "' was not enabled") + } +} + +func (self *Program) branchSize(p *Instruction) int { + switch p.branch { + case _B_none: + panic("p is not a branch") + case _B_conditional: + return _N_far_cond + case _B_unconditional: + return _N_far_uncond + default: + panic("invalid instruction") + } +} + +/** Pseudo-Instructions **/ + +// Byte is a pseudo-instruction to add raw byte to the assembled code. +func (self *Program) Byte(v *expr.Expr) (p *Instruction) { + p = self.pseudo(_PseudoByte) + p.pseudo.expr = v + return +} + +// Word is a pseudo-instruction to add raw uint16 as little-endian to the assembled code. +func (self *Program) Word(v *expr.Expr) (p *Instruction) { + p = self.pseudo(_PseudoWord) + p.pseudo.expr = v + return +} + +// Long is a pseudo-instruction to add raw uint32 as little-endian to the assembled code. +func (self *Program) Long(v *expr.Expr) (p *Instruction) { + p = self.pseudo(_PseudoLong) + p.pseudo.expr = v + return +} + +// Quad is a pseudo-instruction to add raw uint64 as little-endian to the assembled code. +func (self *Program) Quad(v *expr.Expr) (p *Instruction) { + p = self.pseudo(_PseudoQuad) + p.pseudo.expr = v + return +} + +// Data is a pseudo-instruction to add raw bytes to the assembled code. +func (self *Program) Data(v []byte) (p *Instruction) { + p = self.pseudo(_PseudoData) + p.pseudo.data = v + return +} + +// Align is a pseudo-instruction to ensure the PC is aligned to a certain value. +func (self *Program) Align(align uint64, padding *expr.Expr) (p *Instruction) { + p = self.pseudo(_PseudoAlign) + p.pseudo.uint = align + p.pseudo.expr = padding + return +} + +/** Program Assembler **/ + +// Free returns the Program object into pool. +// Any operation performed after Free is undefined behavior. +// +// NOTE: This also frees all the instructions, labels, memory +// +// operands and expressions associated with this program. +func (self *Program) Free() { + self.clear() + //freeProgram(self) +} + +// Link pins a label at the current position. +func (self *Program) Link(p *Label) { + if p.Dest != nil { + panic("lable was alreay linked") + } else { + p.Dest = self.pseudo(_PseudoNop) + } +} + +// Assemble assembles and links the entire program into machine code. +func (self *Program) Assemble(pc uintptr) (ret []byte) { + orig := pc + next := true + offs := uintptr(0) + + /* Pass 0: PC-precompute, assume all labeled branches are far-branches. */ + for p := self.head; p != nil; p = p.next { + if p.pc = pc; !isLabel(p.argv[0]) || p.branch == _B_none { + pc += uintptr(p.encode(nil)) + } else { + pc += uintptr(self.branchSize(p)) + } + } + + /* allocate space for the machine code */ + nb := int(pc - orig) + ret = make([]byte, 0, nb) + + /* Pass 1: adjust all the jumps */ + for next { + next = false + offs = uintptr(0) + + /* scan all the branches */ + for p := self.head; p != nil; p = p.next { + var ok bool + var lb *Label + + /* re-calculate the alignment here */ + if nb = p.nb; p.pseudo.kind == _PseudoAlign { + p.pc -= offs + offs += uintptr(nb - p.encode(nil)) + continue + } + + /* adjust the program counter */ + p.pc -= offs + lb, ok = p.argv[0].(*Label) + + /* only care about labeled far-branches */ + if !ok || p.nb == _N_near || p.branch == _B_none { + continue + } + + /* calculate the jump offset */ + size := self.branchSize(p) + diff := lb.offset(p.pc, size) + + /* too far to be a near jump */ + if diff > 127 || diff < -128 { + p.nb = size + continue + } + + /* a far jump becomes a near jump, calculate + * the PC adjustment value and assemble again */ + next = true + p.nb = _N_near + offs += uintptr(size - _N_near) + } + } + + /* Pass 3: link all the cross-references */ + for p := self.head; p != nil; p = p.next { + for i := 0; i < p.argc; i++ { + var ok bool + var lb *Label + var op *MemoryOperand + + /* resolve labels */ + if lb, ok = p.argv[i].(*Label); ok { + p.argv[i] = lb.offset(p.pc, p.nb) + continue + } + + /* check for memory operands */ + if op, ok = p.argv[i].(*MemoryOperand); !ok { + continue + } + + /* check for label references */ + if op.Addr.Type != Reference { + continue + } + + /* replace the label with the real offset */ + op.Addr.Type = Offset + op.Addr.Offset = op.Addr.Reference.offset(p.pc, p.nb) + } + } + + /* Pass 4: actually encode all the instructions */ + for p := self.head; p != nil; p = p.next { + p.encode(&ret) + } + + /* all done */ + return ret +} + +// AssembleAndFree is like Assemble, but it frees the Program after assembling. +func (self *Program) AssembleAndFree(pc uintptr) (ret []byte) { + ret = self.Assemble(pc) + self.Free() + return +} diff --git a/vendor/github.com/cloudwego/iasm/x86_64/registers.go b/vendor/github.com/cloudwego/iasm/x86_64/registers.go new file mode 100644 index 000000000..574c2ee96 --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/x86_64/registers.go @@ -0,0 +1,693 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package x86_64 + +import ( + `fmt` +) + +// Register represents a hardware register. +type Register interface { + fmt.Stringer + implRegister() +} + +type ( + Register8 byte + Register16 byte + Register32 byte + Register64 byte +) + +type ( + KRegister byte + MMRegister byte + XMMRegister byte + YMMRegister byte + ZMMRegister byte +) + +// RegisterMask is a KRegister used to mask another register. +type RegisterMask struct { + Z bool + K KRegister +} + +// String implements the fmt.Stringer interface. +func (self RegisterMask) String() string { + if !self.Z { + return fmt.Sprintf("{%%%s}", self.K) + } else { + return fmt.Sprintf("{%%%s}{z}", self.K) + } +} + +// MaskedRegister is a Register masked by a RegisterMask. +type MaskedRegister struct { + Reg Register + Mask RegisterMask +} + +// String implements the fmt.Stringer interface. +func (self MaskedRegister) String() string { + return self.Reg.String() + self.Mask.String() +} + +const ( + AL Register8 = iota + CL + DL + BL + SPL + BPL + SIL + DIL + R8b + R9b + R10b + R11b + R12b + R13b + R14b + R15b +) + +const ( + AH = SPL | 0x80 + CH = BPL | 0x80 + DH = SIL | 0x80 + BH = DIL | 0x80 +) + +const ( + AX Register16 = iota + CX + DX + BX + SP + BP + SI + DI + R8w + R9w + R10w + R11w + R12w + R13w + R14w + R15w +) + +const ( + EAX Register32 = iota + ECX + EDX + EBX + ESP + EBP + ESI + EDI + R8d + R9d + R10d + R11d + R12d + R13d + R14d + R15d +) + +const ( + RAX Register64 = iota + RCX + RDX + RBX + RSP + RBP + RSI + RDI + R8 + R9 + R10 + R11 + R12 + R13 + R14 + R15 +) + +const ( + K0 KRegister = iota + K1 + K2 + K3 + K4 + K5 + K6 + K7 +) + +const ( + MM0 MMRegister = iota + MM1 + MM2 + MM3 + MM4 + MM5 + MM6 + MM7 +) + +const ( + XMM0 XMMRegister = iota + XMM1 + XMM2 + XMM3 + XMM4 + XMM5 + XMM6 + XMM7 + XMM8 + XMM9 + XMM10 + XMM11 + XMM12 + XMM13 + XMM14 + XMM15 + XMM16 + XMM17 + XMM18 + XMM19 + XMM20 + XMM21 + XMM22 + XMM23 + XMM24 + XMM25 + XMM26 + XMM27 + XMM28 + XMM29 + XMM30 + XMM31 +) + +const ( + YMM0 YMMRegister = iota + YMM1 + YMM2 + YMM3 + YMM4 + YMM5 + YMM6 + YMM7 + YMM8 + YMM9 + YMM10 + YMM11 + YMM12 + YMM13 + YMM14 + YMM15 + YMM16 + YMM17 + YMM18 + YMM19 + YMM20 + YMM21 + YMM22 + YMM23 + YMM24 + YMM25 + YMM26 + YMM27 + YMM28 + YMM29 + YMM30 + YMM31 +) + +const ( + ZMM0 ZMMRegister = iota + ZMM1 + ZMM2 + ZMM3 + ZMM4 + ZMM5 + ZMM6 + ZMM7 + ZMM8 + ZMM9 + ZMM10 + ZMM11 + ZMM12 + ZMM13 + ZMM14 + ZMM15 + ZMM16 + ZMM17 + ZMM18 + ZMM19 + ZMM20 + ZMM21 + ZMM22 + ZMM23 + ZMM24 + ZMM25 + ZMM26 + ZMM27 + ZMM28 + ZMM29 + ZMM30 + ZMM31 +) + +func (self Register8) implRegister() {} +func (self Register16) implRegister() {} +func (self Register32) implRegister() {} +func (self Register64) implRegister() {} + +func (self KRegister) implRegister() {} +func (self MMRegister) implRegister() {} +func (self XMMRegister) implRegister() {} +func (self YMMRegister) implRegister() {} +func (self ZMMRegister) implRegister() {} + +func (self Register8) String() string { if int(self) >= len(r8names) { return "???" } else { return r8names[self] } } +func (self Register16) String() string { if int(self) >= len(r16names) { return "???" } else { return r16names[self] } } +func (self Register32) String() string { if int(self) >= len(r32names) { return "???" } else { return r32names[self] } } +func (self Register64) String() string { if int(self) >= len(r64names) { return "???" } else { return r64names[self] } } + +func (self KRegister) String() string { if int(self) >= len(knames) { return "???" } else { return knames[self] } } +func (self MMRegister) String() string { if int(self) >= len(mmnames) { return "???" } else { return mmnames[self] } } +func (self XMMRegister) String() string { if int(self) >= len(xmmnames) { return "???" } else { return xmmnames[self] } } +func (self YMMRegister) String() string { if int(self) >= len(ymmnames) { return "???" } else { return ymmnames[self] } } +func (self ZMMRegister) String() string { if int(self) >= len(zmmnames) { return "???" } else { return zmmnames[self] } } + +// Registers maps register name into Register instances. +var Registers = map[string]Register { + "al" : AL, + "cl" : CL, + "dl" : DL, + "bl" : BL, + "spl" : SPL, + "bpl" : BPL, + "sil" : SIL, + "dil" : DIL, + "r8b" : R8b, + "r9b" : R9b, + "r10b" : R10b, + "r11b" : R11b, + "r12b" : R12b, + "r13b" : R13b, + "r14b" : R14b, + "r15b" : R15b, + "ah" : AH, + "ch" : CH, + "dh" : DH, + "bh" : BH, + "ax" : AX, + "cx" : CX, + "dx" : DX, + "bx" : BX, + "sp" : SP, + "bp" : BP, + "si" : SI, + "di" : DI, + "r8w" : R8w, + "r9w" : R9w, + "r10w" : R10w, + "r11w" : R11w, + "r12w" : R12w, + "r13w" : R13w, + "r14w" : R14w, + "r15w" : R15w, + "eax" : EAX, + "ecx" : ECX, + "edx" : EDX, + "ebx" : EBX, + "esp" : ESP, + "ebp" : EBP, + "esi" : ESI, + "edi" : EDI, + "r8d" : R8d, + "r9d" : R9d, + "r10d" : R10d, + "r11d" : R11d, + "r12d" : R12d, + "r13d" : R13d, + "r14d" : R14d, + "r15d" : R15d, + "rax" : RAX, + "rcx" : RCX, + "rdx" : RDX, + "rbx" : RBX, + "rsp" : RSP, + "rbp" : RBP, + "rsi" : RSI, + "rdi" : RDI, + "r8" : R8, + "r9" : R9, + "r10" : R10, + "r11" : R11, + "r12" : R12, + "r13" : R13, + "r14" : R14, + "r15" : R15, + "k0" : K0, + "k1" : K1, + "k2" : K2, + "k3" : K3, + "k4" : K4, + "k5" : K5, + "k6" : K6, + "k7" : K7, + "mm0" : MM0, + "mm1" : MM1, + "mm2" : MM2, + "mm3" : MM3, + "mm4" : MM4, + "mm5" : MM5, + "mm6" : MM6, + "mm7" : MM7, + "xmm0" : XMM0, + "xmm1" : XMM1, + "xmm2" : XMM2, + "xmm3" : XMM3, + "xmm4" : XMM4, + "xmm5" : XMM5, + "xmm6" : XMM6, + "xmm7" : XMM7, + "xmm8" : XMM8, + "xmm9" : XMM9, + "xmm10" : XMM10, + "xmm11" : XMM11, + "xmm12" : XMM12, + "xmm13" : XMM13, + "xmm14" : XMM14, + "xmm15" : XMM15, + "xmm16" : XMM16, + "xmm17" : XMM17, + "xmm18" : XMM18, + "xmm19" : XMM19, + "xmm20" : XMM20, + "xmm21" : XMM21, + "xmm22" : XMM22, + "xmm23" : XMM23, + "xmm24" : XMM24, + "xmm25" : XMM25, + "xmm26" : XMM26, + "xmm27" : XMM27, + "xmm28" : XMM28, + "xmm29" : XMM29, + "xmm30" : XMM30, + "xmm31" : XMM31, + "ymm0" : YMM0, + "ymm1" : YMM1, + "ymm2" : YMM2, + "ymm3" : YMM3, + "ymm4" : YMM4, + "ymm5" : YMM5, + "ymm6" : YMM6, + "ymm7" : YMM7, + "ymm8" : YMM8, + "ymm9" : YMM9, + "ymm10" : YMM10, + "ymm11" : YMM11, + "ymm12" : YMM12, + "ymm13" : YMM13, + "ymm14" : YMM14, + "ymm15" : YMM15, + "ymm16" : YMM16, + "ymm17" : YMM17, + "ymm18" : YMM18, + "ymm19" : YMM19, + "ymm20" : YMM20, + "ymm21" : YMM21, + "ymm22" : YMM22, + "ymm23" : YMM23, + "ymm24" : YMM24, + "ymm25" : YMM25, + "ymm26" : YMM26, + "ymm27" : YMM27, + "ymm28" : YMM28, + "ymm29" : YMM29, + "ymm30" : YMM30, + "ymm31" : YMM31, + "zmm0" : ZMM0, + "zmm1" : ZMM1, + "zmm2" : ZMM2, + "zmm3" : ZMM3, + "zmm4" : ZMM4, + "zmm5" : ZMM5, + "zmm6" : ZMM6, + "zmm7" : ZMM7, + "zmm8" : ZMM8, + "zmm9" : ZMM9, + "zmm10" : ZMM10, + "zmm11" : ZMM11, + "zmm12" : ZMM12, + "zmm13" : ZMM13, + "zmm14" : ZMM14, + "zmm15" : ZMM15, + "zmm16" : ZMM16, + "zmm17" : ZMM17, + "zmm18" : ZMM18, + "zmm19" : ZMM19, + "zmm20" : ZMM20, + "zmm21" : ZMM21, + "zmm22" : ZMM22, + "zmm23" : ZMM23, + "zmm24" : ZMM24, + "zmm25" : ZMM25, + "zmm26" : ZMM26, + "zmm27" : ZMM27, + "zmm28" : ZMM28, + "zmm29" : ZMM29, + "zmm30" : ZMM30, + "zmm31" : ZMM31, +} + +/** Register Name Tables **/ + +var r8names = [...]string { + AL : "al", + CL : "cl", + DL : "dl", + BL : "bl", + SPL : "spl", + BPL : "bpl", + SIL : "sil", + DIL : "dil", + R8b : "r8b", + R9b : "r9b", + R10b : "r10b", + R11b : "r11b", + R12b : "r12b", + R13b : "r13b", + R14b : "r14b", + R15b : "r15b", + AH : "ah", + CH : "ch", + DH : "dh", + BH : "bh", +} + +var r16names = [...]string { + AX : "ax", + CX : "cx", + DX : "dx", + BX : "bx", + SP : "sp", + BP : "bp", + SI : "si", + DI : "di", + R8w : "r8w", + R9w : "r9w", + R10w : "r10w", + R11w : "r11w", + R12w : "r12w", + R13w : "r13w", + R14w : "r14w", + R15w : "r15w", +} + +var r32names = [...]string { + EAX : "eax", + ECX : "ecx", + EDX : "edx", + EBX : "ebx", + ESP : "esp", + EBP : "ebp", + ESI : "esi", + EDI : "edi", + R8d : "r8d", + R9d : "r9d", + R10d : "r10d", + R11d : "r11d", + R12d : "r12d", + R13d : "r13d", + R14d : "r14d", + R15d : "r15d", +} + +var r64names = [...]string { + RAX : "rax", + RCX : "rcx", + RDX : "rdx", + RBX : "rbx", + RSP : "rsp", + RBP : "rbp", + RSI : "rsi", + RDI : "rdi", + R8 : "r8", + R9 : "r9", + R10 : "r10", + R11 : "r11", + R12 : "r12", + R13 : "r13", + R14 : "r14", + R15 : "r15", +} + +var knames = [...]string { + K0: "k0", + K1: "k1", + K2: "k2", + K3: "k3", + K4: "k4", + K5: "k5", + K6: "k6", + K7: "k7", +} + +var mmnames = [...]string { + MM0: "mm0", + MM1: "mm1", + MM2: "mm2", + MM3: "mm3", + MM4: "mm4", + MM5: "mm5", + MM6: "mm6", + MM7: "mm7", +} + +var xmmnames = [...]string { + XMM0 : "xmm0", + XMM1 : "xmm1", + XMM2 : "xmm2", + XMM3 : "xmm3", + XMM4 : "xmm4", + XMM5 : "xmm5", + XMM6 : "xmm6", + XMM7 : "xmm7", + XMM8 : "xmm8", + XMM9 : "xmm9", + XMM10 : "xmm10", + XMM11 : "xmm11", + XMM12 : "xmm12", + XMM13 : "xmm13", + XMM14 : "xmm14", + XMM15 : "xmm15", + XMM16 : "xmm16", + XMM17 : "xmm17", + XMM18 : "xmm18", + XMM19 : "xmm19", + XMM20 : "xmm20", + XMM21 : "xmm21", + XMM22 : "xmm22", + XMM23 : "xmm23", + XMM24 : "xmm24", + XMM25 : "xmm25", + XMM26 : "xmm26", + XMM27 : "xmm27", + XMM28 : "xmm28", + XMM29 : "xmm29", + XMM30 : "xmm30", + XMM31 : "xmm31", +} + +var ymmnames = [...]string { + YMM0 : "ymm0", + YMM1 : "ymm1", + YMM2 : "ymm2", + YMM3 : "ymm3", + YMM4 : "ymm4", + YMM5 : "ymm5", + YMM6 : "ymm6", + YMM7 : "ymm7", + YMM8 : "ymm8", + YMM9 : "ymm9", + YMM10 : "ymm10", + YMM11 : "ymm11", + YMM12 : "ymm12", + YMM13 : "ymm13", + YMM14 : "ymm14", + YMM15 : "ymm15", + YMM16 : "ymm16", + YMM17 : "ymm17", + YMM18 : "ymm18", + YMM19 : "ymm19", + YMM20 : "ymm20", + YMM21 : "ymm21", + YMM22 : "ymm22", + YMM23 : "ymm23", + YMM24 : "ymm24", + YMM25 : "ymm25", + YMM26 : "ymm26", + YMM27 : "ymm27", + YMM28 : "ymm28", + YMM29 : "ymm29", + YMM30 : "ymm30", + YMM31 : "ymm31", +} + +var zmmnames = [...]string { + ZMM0 : "zmm0", + ZMM1 : "zmm1", + ZMM2 : "zmm2", + ZMM3 : "zmm3", + ZMM4 : "zmm4", + ZMM5 : "zmm5", + ZMM6 : "zmm6", + ZMM7 : "zmm7", + ZMM8 : "zmm8", + ZMM9 : "zmm9", + ZMM10 : "zmm10", + ZMM11 : "zmm11", + ZMM12 : "zmm12", + ZMM13 : "zmm13", + ZMM14 : "zmm14", + ZMM15 : "zmm15", + ZMM16 : "zmm16", + ZMM17 : "zmm17", + ZMM18 : "zmm18", + ZMM19 : "zmm19", + ZMM20 : "zmm20", + ZMM21 : "zmm21", + ZMM22 : "zmm22", + ZMM23 : "zmm23", + ZMM24 : "zmm24", + ZMM25 : "zmm25", + ZMM26 : "zmm26", + ZMM27 : "zmm27", + ZMM28 : "zmm28", + ZMM29 : "zmm29", + ZMM30 : "zmm30", + ZMM31 : "zmm31", +} diff --git a/vendor/github.com/cloudwego/iasm/x86_64/utils.go b/vendor/github.com/cloudwego/iasm/x86_64/utils.go new file mode 100644 index 000000000..56c107ff5 --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/x86_64/utils.go @@ -0,0 +1,147 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package x86_64 + +import ( + `encoding/binary` + `errors` + `reflect` + `strconv` + `unicode/utf8` + `unsafe` +) + +const ( + _CC_digit = 1 << iota + _CC_ident + _CC_ident0 + _CC_number +) + +func ispow2(v uint64) bool { + return (v & (v - 1)) == 0 +} + +func isdigit(cc rune) bool { + return '0' <= cc && cc <= '9' +} + +func isalpha(cc rune) bool { + return (cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z') +} + +func isident(cc rune) bool { + return cc == '_' || isalpha(cc) || isdigit(cc) +} + +func isident0(cc rune) bool { + return cc == '_' || isalpha(cc) +} + +func isnumber(cc rune) bool { + return (cc == 'b' || cc == 'B') || + (cc == 'o' || cc == 'O') || + (cc == 'x' || cc == 'X') || + (cc >= '0' && cc <= '9') || + (cc >= 'a' && cc <= 'f') || + (cc >= 'A' && cc <= 'F') +} + +func align(v int, n int) int { + return (((v - 1) >> n) + 1) << n +} + +func append8(m *[]byte, v byte) { + *m = append(*m, v) +} + +func append16(m *[]byte, v uint16) { + p := len(*m) + *m = append(*m, 0, 0) + binary.LittleEndian.PutUint16((*m)[p:], v) +} + +func append32(m *[]byte, v uint32) { + p := len(*m) + *m = append(*m, 0, 0, 0, 0) + binary.LittleEndian.PutUint32((*m)[p:], v) +} + +func append64(m *[]byte, v uint64) { + p := len(*m) + *m = append(*m, 0, 0, 0, 0, 0, 0, 0, 0) + binary.LittleEndian.PutUint64((*m)[p:], v) +} + +func expandmm(m *[]byte, n int, v byte) { + sl := (*_GoSlice)(unsafe.Pointer(m)) + nb := sl.len + n + + /* grow as needed */ + if nb > cap(*m) { + *m = growslice(byteType, *m, nb) + } + + /* fill the new area */ + memset(unsafe.Pointer(uintptr(sl.ptr) + uintptr(sl.len)), v, uintptr(n)) + sl.len = nb +} + +func memset(p unsafe.Pointer, c byte, n uintptr) { + if c != 0 { + memsetv(p, c, n) + } else { + memclrNoHeapPointers(p, n) + } +} + +func memsetv(p unsafe.Pointer, c byte, n uintptr) { + for i := uintptr(0); i < n; i++ { + *(*byte)(unsafe.Pointer(uintptr(p) + i)) = c + } +} + +func literal64(v string) (uint64, error) { + var nb int + var ch rune + var ex error + var mm [12]byte + + /* unquote the runes */ + for v != "" { + if ch, _, v, ex = strconv.UnquoteChar(v, '\''); ex != nil { + return 0, ex + } else if nb += utf8.EncodeRune(mm[nb:], ch); nb > 8 { + return 0, errors.New("multi-char constant too large") + } + } + + /* convert to uint64 */ + return *(*uint64)(unsafe.Pointer(&mm)), nil +} + +var ( + byteWrap = reflect.TypeOf(byte(0)) + byteType = (*_GoType)(efaceOf(byteWrap).ptr) +) + +//go:linkname growslice runtime.growslice +func growslice(_ *_GoType, _ []byte, _ int) []byte + +//go:noescape +//go:linkname memclrNoHeapPointers runtime.memclrNoHeapPointers +func memclrNoHeapPointers(_ unsafe.Pointer, _ uintptr) |
