diff options
Diffstat (limited to 'vendor/github.com/tetratelabs/wazero/internal/engine')
91 files changed, 0 insertions, 51151 deletions
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go deleted file mode 100644 index 4e20e4b2c..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go +++ /dev/null @@ -1,3632 +0,0 @@ -package interpreter - -import ( - "bytes" - "encoding/binary" - "fmt" - "math" - "strings" - - "github.com/tetratelabs/wazero/api" - "github.com/tetratelabs/wazero/internal/leb128" - "github.com/tetratelabs/wazero/internal/wasm" -) - -type controlFrameKind byte - -const ( - controlFrameKindBlockWithContinuationLabel controlFrameKind = iota - controlFrameKindBlockWithoutContinuationLabel - controlFrameKindFunction - controlFrameKindLoop - controlFrameKindIfWithElse - controlFrameKindIfWithoutElse -) - -type ( - controlFrame struct { - frameID uint32 - // originalStackLenWithoutParam holds the number of values on the stack - // when Start executing this control frame minus params for the block. - originalStackLenWithoutParam int - // originalStackLenWithoutParamUint64 is almost the same as originalStackLenWithoutParam - // except that it holds the number of values on the stack in uint64. - originalStackLenWithoutParamUint64 int - blockType *wasm.FunctionType - kind controlFrameKind - } - controlFrames struct{ frames []controlFrame } -) - -func (c *controlFrame) ensureContinuation() { - // Make sure that if the frame is block and doesn't have continuation, - // change the Kind so we can emit the continuation block - // later when we reach the End instruction of this frame. - if c.kind == controlFrameKindBlockWithoutContinuationLabel { - c.kind = controlFrameKindBlockWithContinuationLabel - } -} - -func (c *controlFrame) asLabel() label { - switch c.kind { - case controlFrameKindBlockWithContinuationLabel, - controlFrameKindBlockWithoutContinuationLabel: - return newLabel(labelKindContinuation, c.frameID) - case controlFrameKindLoop: - return newLabel(labelKindHeader, c.frameID) - case controlFrameKindFunction: - return newLabel(labelKindReturn, 0) - case controlFrameKindIfWithElse, - controlFrameKindIfWithoutElse: - return newLabel(labelKindContinuation, c.frameID) - } - panic(fmt.Sprintf("unreachable: a bug in interpreterir implementation: %v", c.kind)) -} - -func (c *controlFrames) functionFrame() *controlFrame { - // No need to check stack bound - // as we can assume that all the operations - // are valid thanks to validateFunction - // at module validation phase. - return &c.frames[0] -} - -func (c *controlFrames) get(n int) *controlFrame { - // No need to check stack bound - // as we can assume that all the operations - // are valid thanks to validateFunction - // at module validation phase. - return &c.frames[len(c.frames)-n-1] -} - -func (c *controlFrames) top() *controlFrame { - // No need to check stack bound - // as we can assume that all the operations - // are valid thanks to validateFunction - // at module validation phase. - return &c.frames[len(c.frames)-1] -} - -func (c *controlFrames) empty() bool { - return len(c.frames) == 0 -} - -func (c *controlFrames) pop() (frame *controlFrame) { - // No need to check stack bound - // as we can assume that all the operations - // are valid thanks to validateFunction - // at module validation phase. - frame = c.top() - c.frames = c.frames[:len(c.frames)-1] - return -} - -func (c *controlFrames) push(frame controlFrame) { - c.frames = append(c.frames, frame) -} - -func (c *compiler) initializeStack() { - // Reuse the existing slice. - c.localIndexToStackHeightInUint64 = c.localIndexToStackHeightInUint64[:0] - var current int - for _, lt := range c.sig.Params { - c.localIndexToStackHeightInUint64 = append(c.localIndexToStackHeightInUint64, current) - if lt == wasm.ValueTypeV128 { - current++ - } - current++ - } - - if c.callFrameStackSizeInUint64 > 0 { - // We reserve the stack slots for result values below the return call frame slots. - if diff := c.sig.ResultNumInUint64 - c.sig.ParamNumInUint64; diff > 0 { - current += diff - } - } - - // Non-func param locals Start after the return call frame. - current += c.callFrameStackSizeInUint64 - - for _, lt := range c.localTypes { - c.localIndexToStackHeightInUint64 = append(c.localIndexToStackHeightInUint64, current) - if lt == wasm.ValueTypeV128 { - current++ - } - current++ - } - - // Push function arguments. - for _, t := range c.sig.Params { - c.stackPush(wasmValueTypeTounsignedType(t)) - } - - if c.callFrameStackSizeInUint64 > 0 { - // Reserve the stack slots for results. - for i := 0; i < c.sig.ResultNumInUint64-c.sig.ParamNumInUint64; i++ { - c.stackPush(unsignedTypeI64) - } - - // Reserve the stack slots for call frame. - for i := 0; i < c.callFrameStackSizeInUint64; i++ { - c.stackPush(unsignedTypeI64) - } - } -} - -// compiler is in charge of lowering raw Wasm function body to get compilationResult. -// This is created per *wasm.Module and reused for all functions in it to reduce memory allocations. -type compiler struct { - module *wasm.Module - enabledFeatures api.CoreFeatures - callFrameStackSizeInUint64 int - stack []unsignedType - // stackLenInUint64 is the length of the stack in uint64. - stackLenInUint64 int - currentFrameID uint32 - controlFrames controlFrames - unreachableState struct { - on bool - depth int - } - pc, currentOpPC uint64 - result compilationResult - - // body holds the code for the function's body where Wasm instructions are stored. - body []byte - // sig is the function type of the target function. - sig *wasm.FunctionType - // localTypes holds the target function locals' value types except function params. - localTypes []wasm.ValueType - // localIndexToStackHeightInUint64 maps the local index (starting with function params) to the stack height - // where the local is places. This is the necessary mapping for functions who contain vector type locals. - localIndexToStackHeightInUint64 []int - - // types hold all the function types in the module where the targe function exists. - types []wasm.FunctionType - // funcs holds the type indexes for all declared functions in the module where the target function exists. - funcs []uint32 - // globals holds the global types for all declared globals in the module where the target function exists. - globals []wasm.GlobalType - - // needSourceOffset is true if this module requires DWARF based stack trace. - needSourceOffset bool - // bodyOffsetInCodeSection is the offset of the body of this function in the original Wasm binary's code section. - bodyOffsetInCodeSection uint64 - - ensureTermination bool - // Pre-allocated bytes.Reader to be used in various places. - br *bytes.Reader - funcTypeToSigs funcTypeToIRSignatures - - next int -} - -//lint:ignore U1000 for debugging only. -func (c *compiler) stackDump() string { - strs := make([]string, 0, len(c.stack)) - for _, s := range c.stack { - strs = append(strs, s.String()) - } - return "[" + strings.Join(strs, ", ") + "]" -} - -func (c *compiler) markUnreachable() { - c.unreachableState.on = true -} - -func (c *compiler) resetUnreachable() { - c.unreachableState.on = false -} - -// memoryType is the type of memory in a compiled module. -type memoryType byte - -const ( - // memoryTypeNone indicates there is no memory. - memoryTypeNone memoryType = iota - // memoryTypeStandard indicates there is a non-shared memory. - memoryTypeStandard - // memoryTypeShared indicates there is a shared memory. - memoryTypeShared -) - -type compilationResult struct { - // Operations holds interpreterir operations compiled from Wasm instructions in a Wasm function. - Operations []unionOperation - - // IROperationSourceOffsetsInWasmBinary is index-correlated with Operation and maps each operation to the corresponding source instruction's - // offset in the original WebAssembly binary. - // Non nil only when the given Wasm module has the DWARF section. - IROperationSourceOffsetsInWasmBinary []uint64 - - // LabelCallers maps label to the number of callers to that label. - // Here "callers" means that the call-sites which jumps to the label with br, br_if or br_table - // instructions. - // - // Note: zero possible and allowed in wasm. e.g. - // - // (block - // (br 0) - // (block i32.const 1111) - // ) - // - // This example the label corresponding to `(block i32.const 1111)` is never be reached at runtime because `br 0` exits the function before we reach there - LabelCallers map[label]uint32 - // UsesMemory is true if this function might use memory. - UsesMemory bool - - // The following fields are per-module values, not per-function. - - // Globals holds all the declarations of globals in the module from which this function is compiled. - Globals []wasm.GlobalType - // Functions holds all the declarations of function in the module from which this function is compiled, including itself. - Functions []wasm.Index - // Types holds all the types in the module from which this function is compiled. - Types []wasm.FunctionType - // Memory indicates the type of memory of the module. - Memory memoryType - // HasTable is true if the module from which this function is compiled has table declaration. - HasTable bool - // HasDataInstances is true if the module has data instances which might be used by memory.init or data.drop instructions. - HasDataInstances bool - // HasDataInstances is true if the module has element instances which might be used by table.init or elem.drop instructions. - HasElementInstances bool -} - -// newCompiler returns the new *compiler for the given parameters. -// Use compiler.Next function to get compilation result per function. -func newCompiler(enabledFeatures api.CoreFeatures, callFrameStackSizeInUint64 int, module *wasm.Module, ensureTermination bool) (*compiler, error) { - functions, globals, mem, tables, err := module.AllDeclarations() - if err != nil { - return nil, err - } - - hasTable, hasDataInstances, hasElementInstances := len(tables) > 0, - len(module.DataSection) > 0, len(module.ElementSection) > 0 - - var mt memoryType - switch { - case mem == nil: - mt = memoryTypeNone - case mem.IsShared: - mt = memoryTypeShared - default: - mt = memoryTypeStandard - } - - types := module.TypeSection - - c := &compiler{ - module: module, - enabledFeatures: enabledFeatures, - controlFrames: controlFrames{}, - callFrameStackSizeInUint64: callFrameStackSizeInUint64, - result: compilationResult{ - Globals: globals, - Functions: functions, - Types: types, - Memory: mt, - HasTable: hasTable, - HasDataInstances: hasDataInstances, - HasElementInstances: hasElementInstances, - LabelCallers: map[label]uint32{}, - }, - globals: globals, - funcs: functions, - types: types, - ensureTermination: ensureTermination, - br: bytes.NewReader(nil), - funcTypeToSigs: funcTypeToIRSignatures{ - indirectCalls: make([]*signature, len(types)), - directCalls: make([]*signature, len(types)), - wasmTypes: types, - }, - needSourceOffset: module.DWARFLines != nil, - } - return c, nil -} - -// Next returns the next compilationResult for this compiler. -func (c *compiler) Next() (*compilationResult, error) { - funcIndex := c.next - code := &c.module.CodeSection[funcIndex] - sig := &c.types[c.module.FunctionSection[funcIndex]] - - // Reset the previous result. - c.result.Operations = c.result.Operations[:0] - c.result.IROperationSourceOffsetsInWasmBinary = c.result.IROperationSourceOffsetsInWasmBinary[:0] - c.result.UsesMemory = false - // Clears the existing entries in LabelCallers. - for frameID := uint32(0); frameID <= c.currentFrameID; frameID++ { - for k := labelKind(0); k < labelKindNum; k++ { - delete(c.result.LabelCallers, newLabel(k, frameID)) - } - } - // Reset the previous states. - c.pc = 0 - c.currentOpPC = 0 - c.currentFrameID = 0 - c.stackLenInUint64 = 0 - c.unreachableState.on, c.unreachableState.depth = false, 0 - - if err := c.compile(sig, code.Body, code.LocalTypes, code.BodyOffsetInCodeSection); err != nil { - return nil, err - } - c.next++ - return &c.result, nil -} - -// Compile lowers given function instance into interpreterir operations -// so that the resulting operations can be consumed by the interpreter -// or the compiler compilation engine. -func (c *compiler) compile(sig *wasm.FunctionType, body []byte, localTypes []wasm.ValueType, bodyOffsetInCodeSection uint64) error { - // Set function specific fields. - c.body = body - c.localTypes = localTypes - c.sig = sig - c.bodyOffsetInCodeSection = bodyOffsetInCodeSection - - // Reuses the underlying slices. - c.stack = c.stack[:0] - c.controlFrames.frames = c.controlFrames.frames[:0] - - c.initializeStack() - - // Emit const expressions for locals. - // Note that here we don't take function arguments - // into account, meaning that callers must push - // arguments before entering into the function body. - for _, t := range c.localTypes { - c.emitDefaultValue(t) - } - - // Insert the function control frame. - c.controlFrames.push(controlFrame{ - frameID: c.nextFrameID(), - blockType: c.sig, - kind: controlFrameKindFunction, - }) - - // Now, enter the function body. - for !c.controlFrames.empty() && c.pc < uint64(len(c.body)) { - if err := c.handleInstruction(); err != nil { - return fmt.Errorf("handling instruction: %w", err) - } - } - return nil -} - -// Translate the current Wasm instruction to interpreterir's operations, -// and emit the results into c.results. -func (c *compiler) handleInstruction() error { - op := c.body[c.pc] - c.currentOpPC = c.pc - if false { - var instName string - if op == wasm.OpcodeVecPrefix { - instName = wasm.VectorInstructionName(c.body[c.pc+1]) - } else if op == wasm.OpcodeAtomicPrefix { - instName = wasm.AtomicInstructionName(c.body[c.pc+1]) - } else if op == wasm.OpcodeMiscPrefix { - instName = wasm.MiscInstructionName(c.body[c.pc+1]) - } else { - instName = wasm.InstructionName(op) - } - fmt.Printf("handling %s, unreachable_state(on=%v,depth=%d), stack=%v\n", - instName, c.unreachableState.on, c.unreachableState.depth, c.stack, - ) - } - - var peekValueType unsignedType - if len(c.stack) > 0 { - peekValueType = c.stackPeek() - } - - // Modify the stack according the current instruction. - // Note that some instructions will read "index" in - // applyToStack and advance c.pc inside the function. - index, err := c.applyToStack(op) - if err != nil { - return fmt.Errorf("apply stack failed for %s: %w", wasm.InstructionName(op), err) - } - // Now we handle each instruction, and - // emit the corresponding interpreterir operations to the results. -operatorSwitch: - switch op { - case wasm.OpcodeUnreachable: - c.emit(newOperationUnreachable()) - c.markUnreachable() - case wasm.OpcodeNop: - // Nop is noop! - case wasm.OpcodeBlock: - c.br.Reset(c.body[c.pc+1:]) - bt, num, err := wasm.DecodeBlockType(c.types, c.br, c.enabledFeatures) - if err != nil { - return fmt.Errorf("reading block type for block instruction: %w", err) - } - c.pc += num - - if c.unreachableState.on { - // If it is currently in unreachable, - // just remove the entire block. - c.unreachableState.depth++ - break operatorSwitch - } - - // Create a new frame -- entering this block. - frame := controlFrame{ - frameID: c.nextFrameID(), - originalStackLenWithoutParam: len(c.stack) - len(bt.Params), - originalStackLenWithoutParamUint64: c.stackLenInUint64 - bt.ParamNumInUint64, - kind: controlFrameKindBlockWithoutContinuationLabel, - blockType: bt, - } - c.controlFrames.push(frame) - - case wasm.OpcodeLoop: - c.br.Reset(c.body[c.pc+1:]) - bt, num, err := wasm.DecodeBlockType(c.types, c.br, c.enabledFeatures) - if err != nil { - return fmt.Errorf("reading block type for loop instruction: %w", err) - } - c.pc += num - - if c.unreachableState.on { - // If it is currently in unreachable, - // just remove the entire block. - c.unreachableState.depth++ - break operatorSwitch - } - - // Create a new frame -- entering loop. - frame := controlFrame{ - frameID: c.nextFrameID(), - originalStackLenWithoutParam: len(c.stack) - len(bt.Params), - originalStackLenWithoutParamUint64: c.stackLenInUint64 - bt.ParamNumInUint64, - kind: controlFrameKindLoop, - blockType: bt, - } - c.controlFrames.push(frame) - - // Prep labels for inside and the continuation of this loop. - loopLabel := newLabel(labelKindHeader, frame.frameID) - c.result.LabelCallers[loopLabel]++ - - // Emit the branch operation to enter inside the loop. - c.emit(newOperationBr(loopLabel)) - c.emit(newOperationLabel(loopLabel)) - - // Insert the exit code check on the loop header, which is the only necessary point in the function body - // to prevent infinite loop. - // - // Note that this is a little aggressive: this checks the exit code regardless the loop header is actually - // the loop. In other words, this checks even when no br/br_if/br_table instructions jumping to this loop - // exist. However, in reality, that shouldn't be an issue since such "noop" loop header will highly likely be - // optimized out by almost all guest language compilers which have the control flow optimization passes. - if c.ensureTermination { - c.emit(newOperationBuiltinFunctionCheckExitCode()) - } - case wasm.OpcodeIf: - c.br.Reset(c.body[c.pc+1:]) - bt, num, err := wasm.DecodeBlockType(c.types, c.br, c.enabledFeatures) - if err != nil { - return fmt.Errorf("reading block type for if instruction: %w", err) - } - c.pc += num - - if c.unreachableState.on { - // If it is currently in unreachable, - // just remove the entire block. - c.unreachableState.depth++ - break operatorSwitch - } - - // Create a new frame -- entering if. - frame := controlFrame{ - frameID: c.nextFrameID(), - originalStackLenWithoutParam: len(c.stack) - len(bt.Params), - originalStackLenWithoutParamUint64: c.stackLenInUint64 - bt.ParamNumInUint64, - // Note this will be set to controlFrameKindIfWithElse - // when else opcode found later. - kind: controlFrameKindIfWithoutElse, - blockType: bt, - } - c.controlFrames.push(frame) - - // Prep labels for if and else of this if. - thenLabel := newLabel(labelKindHeader, frame.frameID) - elseLabel := newLabel(labelKindElse, frame.frameID) - c.result.LabelCallers[thenLabel]++ - c.result.LabelCallers[elseLabel]++ - - // Emit the branch operation to enter the then block. - c.emit(newOperationBrIf(thenLabel, elseLabel, nopinclusiveRange)) - c.emit(newOperationLabel(thenLabel)) - case wasm.OpcodeElse: - frame := c.controlFrames.top() - if c.unreachableState.on && c.unreachableState.depth > 0 { - // If it is currently in unreachable, and the nested if, - // just remove the entire else block. - break operatorSwitch - } else if c.unreachableState.on { - // If it is currently in unreachable, and the non-nested if, - // reset the stack so we can correctly handle the else block. - top := c.controlFrames.top() - c.stackSwitchAt(top) - top.kind = controlFrameKindIfWithElse - - // Re-push the parameters to the if block so that else block can use them. - for _, t := range frame.blockType.Params { - c.stackPush(wasmValueTypeTounsignedType(t)) - } - - // We are no longer unreachable in else frame, - // so emit the correct label, and reset the unreachable state. - elseLabel := newLabel(labelKindElse, frame.frameID) - c.resetUnreachable() - c.emit( - newOperationLabel(elseLabel), - ) - break operatorSwitch - } - - // Change the Kind of this If block, indicating that - // the if has else block. - frame.kind = controlFrameKindIfWithElse - - // We need to reset the stack so that - // the values pushed inside the then block - // do not affect the else block. - dropOp := newOperationDrop(c.getFrameDropRange(frame, false)) - - // Reset the stack manipulated by the then block, and re-push the block param types to the stack. - - c.stackSwitchAt(frame) - for _, t := range frame.blockType.Params { - c.stackPush(wasmValueTypeTounsignedType(t)) - } - - // Prep labels for else and the continuation of this if block. - elseLabel := newLabel(labelKindElse, frame.frameID) - continuationLabel := newLabel(labelKindContinuation, frame.frameID) - c.result.LabelCallers[continuationLabel]++ - - // Emit the instructions for exiting the if loop, - // and then the initiation of else block. - c.emit(dropOp) - // Jump to the continuation of this block. - c.emit(newOperationBr(continuationLabel)) - // Initiate the else block. - c.emit(newOperationLabel(elseLabel)) - case wasm.OpcodeEnd: - if c.unreachableState.on && c.unreachableState.depth > 0 { - c.unreachableState.depth-- - break operatorSwitch - } else if c.unreachableState.on { - c.resetUnreachable() - - frame := c.controlFrames.pop() - if c.controlFrames.empty() { - return nil - } - - c.stackSwitchAt(frame) - for _, t := range frame.blockType.Results { - c.stackPush(wasmValueTypeTounsignedType(t)) - } - - continuationLabel := newLabel(labelKindContinuation, frame.frameID) - if frame.kind == controlFrameKindIfWithoutElse { - // Emit the else label. - elseLabel := newLabel(labelKindElse, frame.frameID) - c.result.LabelCallers[continuationLabel]++ - c.emit(newOperationLabel(elseLabel)) - c.emit(newOperationBr(continuationLabel)) - c.emit(newOperationLabel(continuationLabel)) - } else { - c.emit( - newOperationLabel(continuationLabel), - ) - } - - break operatorSwitch - } - - frame := c.controlFrames.pop() - - // We need to reset the stack so that - // the values pushed inside the block. - dropOp := newOperationDrop(c.getFrameDropRange(frame, true)) - c.stackSwitchAt(frame) - - // Push the result types onto the stack. - for _, t := range frame.blockType.Results { - c.stackPush(wasmValueTypeTounsignedType(t)) - } - - // Emit the instructions according to the Kind of the current control frame. - switch frame.kind { - case controlFrameKindFunction: - if !c.controlFrames.empty() { - // Should never happen. If so, there's a bug in the translation. - panic("bug: found more function control frames") - } - // Return from function. - c.emit(dropOp) - c.emit(newOperationBr(newLabel(labelKindReturn, 0))) - case controlFrameKindIfWithoutElse: - // This case we have to emit "empty" else label. - elseLabel := newLabel(labelKindElse, frame.frameID) - continuationLabel := newLabel(labelKindContinuation, frame.frameID) - c.result.LabelCallers[continuationLabel] += 2 - c.emit(dropOp) - c.emit(newOperationBr(continuationLabel)) - // Emit the else which soon branches into the continuation. - c.emit(newOperationLabel(elseLabel)) - c.emit(newOperationBr(continuationLabel)) - // Initiate the continuation. - c.emit(newOperationLabel(continuationLabel)) - case controlFrameKindBlockWithContinuationLabel, - controlFrameKindIfWithElse: - continuationLabel := newLabel(labelKindContinuation, frame.frameID) - c.result.LabelCallers[continuationLabel]++ - c.emit(dropOp) - c.emit(newOperationBr(continuationLabel)) - c.emit(newOperationLabel(continuationLabel)) - case controlFrameKindLoop, controlFrameKindBlockWithoutContinuationLabel: - c.emit( - dropOp, - ) - default: - // Should never happen. If so, there's a bug in the translation. - panic(fmt.Errorf("bug: invalid control frame Kind: 0x%x", frame.kind)) - } - - case wasm.OpcodeBr: - targetIndex, n, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return fmt.Errorf("read the target for br_if: %w", err) - } - c.pc += n - - if c.unreachableState.on { - // If it is currently in unreachable, br is no-op. - break operatorSwitch - } - - targetFrame := c.controlFrames.get(int(targetIndex)) - targetFrame.ensureContinuation() - dropOp := newOperationDrop(c.getFrameDropRange(targetFrame, false)) - targetID := targetFrame.asLabel() - c.result.LabelCallers[targetID]++ - c.emit(dropOp) - c.emit(newOperationBr(targetID)) - // Br operation is stack-polymorphic, and mark the state as unreachable. - // That means subsequent instructions in the current control frame are "unreachable" - // and can be safely removed. - c.markUnreachable() - case wasm.OpcodeBrIf: - targetIndex, n, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return fmt.Errorf("read the target for br_if: %w", err) - } - c.pc += n - - if c.unreachableState.on { - // If it is currently in unreachable, br-if is no-op. - break operatorSwitch - } - - targetFrame := c.controlFrames.get(int(targetIndex)) - targetFrame.ensureContinuation() - drop := c.getFrameDropRange(targetFrame, false) - target := targetFrame.asLabel() - c.result.LabelCallers[target]++ - - continuationLabel := newLabel(labelKindHeader, c.nextFrameID()) - c.result.LabelCallers[continuationLabel]++ - c.emit(newOperationBrIf(target, continuationLabel, drop)) - // Start emitting else block operations. - c.emit(newOperationLabel(continuationLabel)) - case wasm.OpcodeBrTable: - c.br.Reset(c.body[c.pc+1:]) - r := c.br - numTargets, n, err := leb128.DecodeUint32(r) - if err != nil { - return fmt.Errorf("error reading number of targets in br_table: %w", err) - } - c.pc += n - - if c.unreachableState.on { - // If it is currently in unreachable, br_table is no-op. - // But before proceeding to the next instruction, we must advance the pc - // according to the number of br_table targets. - for i := uint32(0); i <= numTargets; i++ { // inclusive as we also need to read the index of default target. - _, n, err := leb128.DecodeUint32(r) - if err != nil { - return fmt.Errorf("error reading target %d in br_table: %w", i, err) - } - c.pc += n - } - break operatorSwitch - } - - // Read the branch targets. - s := numTargets * 2 - targetLabels := make([]uint64, 2+s) // (label, inclusiveRange) * (default+numTargets) - for i := uint32(0); i < s; i += 2 { - l, n, err := leb128.DecodeUint32(r) - if err != nil { - return fmt.Errorf("error reading target %d in br_table: %w", i, err) - } - c.pc += n - targetFrame := c.controlFrames.get(int(l)) - targetFrame.ensureContinuation() - drop := c.getFrameDropRange(targetFrame, false) - targetLabel := targetFrame.asLabel() - targetLabels[i] = uint64(targetLabel) - targetLabels[i+1] = drop.AsU64() - c.result.LabelCallers[targetLabel]++ - } - - // Prep default target control frame. - l, n, err := leb128.DecodeUint32(r) - if err != nil { - return fmt.Errorf("error reading default target of br_table: %w", err) - } - c.pc += n - defaultTargetFrame := c.controlFrames.get(int(l)) - defaultTargetFrame.ensureContinuation() - defaultTargetDrop := c.getFrameDropRange(defaultTargetFrame, false) - defaultLabel := defaultTargetFrame.asLabel() - c.result.LabelCallers[defaultLabel]++ - targetLabels[s] = uint64(defaultLabel) - targetLabels[s+1] = defaultTargetDrop.AsU64() - c.emit(newOperationBrTable(targetLabels)) - - // br_table operation is stack-polymorphic, and mark the state as unreachable. - // That means subsequent instructions in the current control frame are "unreachable" - // and can be safely removed. - c.markUnreachable() - case wasm.OpcodeReturn: - functionFrame := c.controlFrames.functionFrame() - dropOp := newOperationDrop(c.getFrameDropRange(functionFrame, false)) - - // Cleanup the stack and then jmp to function frame's continuation (meaning return). - c.emit(dropOp) - c.emit(newOperationBr(functionFrame.asLabel())) - - // Return operation is stack-polymorphic, and mark the state as unreachable. - // That means subsequent instructions in the current control frame are "unreachable" - // and can be safely removed. - c.markUnreachable() - case wasm.OpcodeCall: - c.emit( - newOperationCall(index), - ) - case wasm.OpcodeCallIndirect: - typeIndex := index - tableIndex, n, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return fmt.Errorf("read target for br_table: %w", err) - } - c.pc += n - c.emit( - newOperationCallIndirect(typeIndex, tableIndex), - ) - case wasm.OpcodeDrop: - r := inclusiveRange{Start: 0, End: 0} - if peekValueType == unsignedTypeV128 { - // inclusiveRange is the range in uint64 representation, so dropping a vector value on top - // should be translated as drop [0..1] inclusively. - r.End++ - } - c.emit(newOperationDrop(r)) - case wasm.OpcodeSelect: - // If it is on the unreachable state, ignore the instruction. - if c.unreachableState.on { - break operatorSwitch - } - isTargetVector := c.stackPeek() == unsignedTypeV128 - c.emit( - newOperationSelect(isTargetVector), - ) - case wasm.OpcodeTypedSelect: - // Skips two bytes: vector size fixed to 1, and the value type for select. - c.pc += 2 - // If it is on the unreachable state, ignore the instruction. - if c.unreachableState.on { - break operatorSwitch - } - // Typed select is semantically equivalent to select at runtime. - isTargetVector := c.stackPeek() == unsignedTypeV128 - c.emit( - newOperationSelect(isTargetVector), - ) - case wasm.OpcodeLocalGet: - depth := c.localDepth(index) - if isVector := c.localType(index) == wasm.ValueTypeV128; !isVector { - c.emit( - // -1 because we already manipulated the stack before - // called localDepth ^^. - newOperationPick(depth-1, isVector), - ) - } else { - c.emit( - // -2 because we already manipulated the stack before - // called localDepth ^^. - newOperationPick(depth-2, isVector), - ) - } - case wasm.OpcodeLocalSet: - depth := c.localDepth(index) - - isVector := c.localType(index) == wasm.ValueTypeV128 - if isVector { - c.emit( - // +2 because we already popped the operands for this operation from the c.stack before - // called localDepth ^^, - newOperationSet(depth+2, isVector), - ) - } else { - c.emit( - // +1 because we already popped the operands for this operation from the c.stack before - // called localDepth ^^, - newOperationSet(depth+1, isVector), - ) - } - case wasm.OpcodeLocalTee: - depth := c.localDepth(index) - isVector := c.localType(index) == wasm.ValueTypeV128 - if isVector { - c.emit(newOperationPick(1, isVector)) - c.emit(newOperationSet(depth+2, isVector)) - } else { - c.emit( - newOperationPick(0, isVector)) - c.emit(newOperationSet(depth+1, isVector)) - } - case wasm.OpcodeGlobalGet: - c.emit( - newOperationGlobalGet(index), - ) - case wasm.OpcodeGlobalSet: - c.emit( - newOperationGlobalSet(index), - ) - case wasm.OpcodeI32Load: - imm, err := c.readMemoryArg(wasm.OpcodeI32LoadName) - if err != nil { - return err - } - c.emit(newOperationLoad(unsignedTypeI32, imm)) - case wasm.OpcodeI64Load: - imm, err := c.readMemoryArg(wasm.OpcodeI64LoadName) - if err != nil { - return err - } - c.emit(newOperationLoad(unsignedTypeI64, imm)) - case wasm.OpcodeF32Load: - imm, err := c.readMemoryArg(wasm.OpcodeF32LoadName) - if err != nil { - return err - } - c.emit(newOperationLoad(unsignedTypeF32, imm)) - case wasm.OpcodeF64Load: - imm, err := c.readMemoryArg(wasm.OpcodeF64LoadName) - if err != nil { - return err - } - c.emit(newOperationLoad(unsignedTypeF64, imm)) - case wasm.OpcodeI32Load8S: - imm, err := c.readMemoryArg(wasm.OpcodeI32Load8SName) - if err != nil { - return err - } - c.emit(newOperationLoad8(signedInt32, imm)) - case wasm.OpcodeI32Load8U: - imm, err := c.readMemoryArg(wasm.OpcodeI32Load8UName) - if err != nil { - return err - } - c.emit(newOperationLoad8(signedUint32, imm)) - case wasm.OpcodeI32Load16S: - imm, err := c.readMemoryArg(wasm.OpcodeI32Load16SName) - if err != nil { - return err - } - c.emit(newOperationLoad16(signedInt32, imm)) - case wasm.OpcodeI32Load16U: - imm, err := c.readMemoryArg(wasm.OpcodeI32Load16UName) - if err != nil { - return err - } - c.emit(newOperationLoad16(signedUint32, imm)) - case wasm.OpcodeI64Load8S: - imm, err := c.readMemoryArg(wasm.OpcodeI64Load8SName) - if err != nil { - return err - } - c.emit(newOperationLoad8(signedInt64, imm)) - case wasm.OpcodeI64Load8U: - imm, err := c.readMemoryArg(wasm.OpcodeI64Load8UName) - if err != nil { - return err - } - c.emit(newOperationLoad8(signedUint64, imm)) - case wasm.OpcodeI64Load16S: - imm, err := c.readMemoryArg(wasm.OpcodeI64Load16SName) - if err != nil { - return err - } - c.emit(newOperationLoad16(signedInt64, imm)) - case wasm.OpcodeI64Load16U: - imm, err := c.readMemoryArg(wasm.OpcodeI64Load16UName) - if err != nil { - return err - } - c.emit(newOperationLoad16(signedUint64, imm)) - case wasm.OpcodeI64Load32S: - imm, err := c.readMemoryArg(wasm.OpcodeI64Load32SName) - if err != nil { - return err - } - c.emit(newOperationLoad32(true, imm)) - case wasm.OpcodeI64Load32U: - imm, err := c.readMemoryArg(wasm.OpcodeI64Load32UName) - if err != nil { - return err - } - c.emit(newOperationLoad32(false, imm)) - case wasm.OpcodeI32Store: - imm, err := c.readMemoryArg(wasm.OpcodeI32StoreName) - if err != nil { - return err - } - c.emit( - newOperationStore(unsignedTypeI32, imm), - ) - case wasm.OpcodeI64Store: - imm, err := c.readMemoryArg(wasm.OpcodeI64StoreName) - if err != nil { - return err - } - c.emit( - newOperationStore(unsignedTypeI64, imm), - ) - case wasm.OpcodeF32Store: - imm, err := c.readMemoryArg(wasm.OpcodeF32StoreName) - if err != nil { - return err - } - c.emit( - newOperationStore(unsignedTypeF32, imm), - ) - case wasm.OpcodeF64Store: - imm, err := c.readMemoryArg(wasm.OpcodeF64StoreName) - if err != nil { - return err - } - c.emit( - newOperationStore(unsignedTypeF64, imm), - ) - case wasm.OpcodeI32Store8: - imm, err := c.readMemoryArg(wasm.OpcodeI32Store8Name) - if err != nil { - return err - } - c.emit( - newOperationStore8(imm), - ) - case wasm.OpcodeI32Store16: - imm, err := c.readMemoryArg(wasm.OpcodeI32Store16Name) - if err != nil { - return err - } - c.emit( - newOperationStore16(imm), - ) - case wasm.OpcodeI64Store8: - imm, err := c.readMemoryArg(wasm.OpcodeI64Store8Name) - if err != nil { - return err - } - c.emit( - newOperationStore8(imm), - ) - case wasm.OpcodeI64Store16: - imm, err := c.readMemoryArg(wasm.OpcodeI64Store16Name) - if err != nil { - return err - } - c.emit( - newOperationStore16(imm), - ) - case wasm.OpcodeI64Store32: - imm, err := c.readMemoryArg(wasm.OpcodeI64Store32Name) - if err != nil { - return err - } - c.emit( - newOperationStore32(imm), - ) - case wasm.OpcodeMemorySize: - c.result.UsesMemory = true - c.pc++ // Skip the reserved one byte. - c.emit( - newOperationMemorySize(), - ) - case wasm.OpcodeMemoryGrow: - c.result.UsesMemory = true - c.pc++ // Skip the reserved one byte. - c.emit( - newOperationMemoryGrow(), - ) - case wasm.OpcodeI32Const: - val, num, err := leb128.LoadInt32(c.body[c.pc+1:]) - if err != nil { - return fmt.Errorf("reading i32.const value: %v", err) - } - c.pc += num - c.emit( - newOperationConstI32(uint32(val)), - ) - case wasm.OpcodeI64Const: - val, num, err := leb128.LoadInt64(c.body[c.pc+1:]) - if err != nil { - return fmt.Errorf("reading i64.const value: %v", err) - } - c.pc += num - c.emit( - newOperationConstI64(uint64(val)), - ) - case wasm.OpcodeF32Const: - v := math.Float32frombits(binary.LittleEndian.Uint32(c.body[c.pc+1:])) - c.pc += 4 - c.emit( - newOperationConstF32(v), - ) - case wasm.OpcodeF64Const: - v := math.Float64frombits(binary.LittleEndian.Uint64(c.body[c.pc+1:])) - c.pc += 8 - c.emit( - newOperationConstF64(v), - ) - case wasm.OpcodeI32Eqz: - c.emit( - newOperationEqz(unsignedInt32), - ) - case wasm.OpcodeI32Eq: - c.emit( - newOperationEq(unsignedTypeI32), - ) - case wasm.OpcodeI32Ne: - c.emit( - newOperationNe(unsignedTypeI32), - ) - case wasm.OpcodeI32LtS: - c.emit( - newOperationLt(signedTypeInt32), - ) - case wasm.OpcodeI32LtU: - c.emit( - newOperationLt(signedTypeUint32), - ) - case wasm.OpcodeI32GtS: - c.emit( - newOperationGt(signedTypeInt32), - ) - case wasm.OpcodeI32GtU: - c.emit( - newOperationGt(signedTypeUint32), - ) - case wasm.OpcodeI32LeS: - c.emit( - newOperationLe(signedTypeInt32), - ) - case wasm.OpcodeI32LeU: - c.emit( - newOperationLe(signedTypeUint32), - ) - case wasm.OpcodeI32GeS: - c.emit( - newOperationGe(signedTypeInt32), - ) - case wasm.OpcodeI32GeU: - c.emit( - newOperationGe(signedTypeUint32), - ) - case wasm.OpcodeI64Eqz: - c.emit( - newOperationEqz(unsignedInt64), - ) - case wasm.OpcodeI64Eq: - c.emit( - newOperationEq(unsignedTypeI64), - ) - case wasm.OpcodeI64Ne: - c.emit( - newOperationNe(unsignedTypeI64), - ) - case wasm.OpcodeI64LtS: - c.emit( - newOperationLt(signedTypeInt64), - ) - case wasm.OpcodeI64LtU: - c.emit( - newOperationLt(signedTypeUint64), - ) - case wasm.OpcodeI64GtS: - c.emit( - newOperationGt(signedTypeInt64), - ) - case wasm.OpcodeI64GtU: - c.emit( - newOperationGt(signedTypeUint64), - ) - case wasm.OpcodeI64LeS: - c.emit( - newOperationLe(signedTypeInt64), - ) - case wasm.OpcodeI64LeU: - c.emit( - newOperationLe(signedTypeUint64), - ) - case wasm.OpcodeI64GeS: - c.emit( - newOperationGe(signedTypeInt64), - ) - case wasm.OpcodeI64GeU: - c.emit( - newOperationGe(signedTypeUint64), - ) - case wasm.OpcodeF32Eq: - c.emit( - newOperationEq(unsignedTypeF32), - ) - case wasm.OpcodeF32Ne: - c.emit( - newOperationNe(unsignedTypeF32), - ) - case wasm.OpcodeF32Lt: - c.emit( - newOperationLt(signedTypeFloat32), - ) - case wasm.OpcodeF32Gt: - c.emit( - newOperationGt(signedTypeFloat32), - ) - case wasm.OpcodeF32Le: - c.emit( - newOperationLe(signedTypeFloat32), - ) - case wasm.OpcodeF32Ge: - c.emit( - newOperationGe(signedTypeFloat32), - ) - case wasm.OpcodeF64Eq: - c.emit( - newOperationEq(unsignedTypeF64), - ) - case wasm.OpcodeF64Ne: - c.emit( - newOperationNe(unsignedTypeF64), - ) - case wasm.OpcodeF64Lt: - c.emit( - newOperationLt(signedTypeFloat64), - ) - case wasm.OpcodeF64Gt: - c.emit( - newOperationGt(signedTypeFloat64), - ) - case wasm.OpcodeF64Le: - c.emit( - newOperationLe(signedTypeFloat64), - ) - case wasm.OpcodeF64Ge: - c.emit( - newOperationGe(signedTypeFloat64), - ) - case wasm.OpcodeI32Clz: - c.emit( - newOperationClz(unsignedInt32), - ) - case wasm.OpcodeI32Ctz: - c.emit( - newOperationCtz(unsignedInt32), - ) - case wasm.OpcodeI32Popcnt: - c.emit( - newOperationPopcnt(unsignedInt32), - ) - case wasm.OpcodeI32Add: - c.emit( - newOperationAdd(unsignedTypeI32), - ) - case wasm.OpcodeI32Sub: - c.emit( - newOperationSub(unsignedTypeI32), - ) - case wasm.OpcodeI32Mul: - c.emit( - newOperationMul(unsignedTypeI32), - ) - case wasm.OpcodeI32DivS: - c.emit( - newOperationDiv(signedTypeInt32), - ) - case wasm.OpcodeI32DivU: - c.emit( - newOperationDiv(signedTypeUint32), - ) - case wasm.OpcodeI32RemS: - c.emit( - newOperationRem(signedInt32), - ) - case wasm.OpcodeI32RemU: - c.emit( - newOperationRem(signedUint32), - ) - case wasm.OpcodeI32And: - c.emit( - newOperationAnd(unsignedInt32), - ) - case wasm.OpcodeI32Or: - c.emit( - newOperationOr(unsignedInt32), - ) - case wasm.OpcodeI32Xor: - c.emit( - newOperationXor(unsignedInt64), - ) - case wasm.OpcodeI32Shl: - c.emit( - newOperationShl(unsignedInt32), - ) - case wasm.OpcodeI32ShrS: - c.emit( - newOperationShr(signedInt32), - ) - case wasm.OpcodeI32ShrU: - c.emit( - newOperationShr(signedUint32), - ) - case wasm.OpcodeI32Rotl: - c.emit( - newOperationRotl(unsignedInt32), - ) - case wasm.OpcodeI32Rotr: - c.emit( - newOperationRotr(unsignedInt32), - ) - case wasm.OpcodeI64Clz: - c.emit( - newOperationClz(unsignedInt64), - ) - case wasm.OpcodeI64Ctz: - c.emit( - newOperationCtz(unsignedInt64), - ) - case wasm.OpcodeI64Popcnt: - c.emit( - newOperationPopcnt(unsignedInt64), - ) - case wasm.OpcodeI64Add: - c.emit( - newOperationAdd(unsignedTypeI64), - ) - case wasm.OpcodeI64Sub: - c.emit( - newOperationSub(unsignedTypeI64), - ) - case wasm.OpcodeI64Mul: - c.emit( - newOperationMul(unsignedTypeI64), - ) - case wasm.OpcodeI64DivS: - c.emit( - newOperationDiv(signedTypeInt64), - ) - case wasm.OpcodeI64DivU: - c.emit( - newOperationDiv(signedTypeUint64), - ) - case wasm.OpcodeI64RemS: - c.emit( - newOperationRem(signedInt64), - ) - case wasm.OpcodeI64RemU: - c.emit( - newOperationRem(signedUint64), - ) - case wasm.OpcodeI64And: - c.emit( - newOperationAnd(unsignedInt64), - ) - case wasm.OpcodeI64Or: - c.emit( - newOperationOr(unsignedInt64), - ) - case wasm.OpcodeI64Xor: - c.emit( - newOperationXor(unsignedInt64), - ) - case wasm.OpcodeI64Shl: - c.emit( - newOperationShl(unsignedInt64), - ) - case wasm.OpcodeI64ShrS: - c.emit( - newOperationShr(signedInt64), - ) - case wasm.OpcodeI64ShrU: - c.emit( - newOperationShr(signedUint64), - ) - case wasm.OpcodeI64Rotl: - c.emit( - newOperationRotl(unsignedInt64), - ) - case wasm.OpcodeI64Rotr: - c.emit( - newOperationRotr(unsignedInt64), - ) - case wasm.OpcodeF32Abs: - c.emit( - newOperationAbs(f32), - ) - case wasm.OpcodeF32Neg: - c.emit( - newOperationNeg(f32), - ) - case wasm.OpcodeF32Ceil: - c.emit( - newOperationCeil(f32), - ) - case wasm.OpcodeF32Floor: - c.emit( - newOperationFloor(f32), - ) - case wasm.OpcodeF32Trunc: - c.emit( - newOperationTrunc(f32), - ) - case wasm.OpcodeF32Nearest: - c.emit( - newOperationNearest(f32), - ) - case wasm.OpcodeF32Sqrt: - c.emit( - newOperationSqrt(f32), - ) - case wasm.OpcodeF32Add: - c.emit( - newOperationAdd(unsignedTypeF32), - ) - case wasm.OpcodeF32Sub: - c.emit( - newOperationSub(unsignedTypeF32), - ) - case wasm.OpcodeF32Mul: - c.emit( - newOperationMul(unsignedTypeF32), - ) - case wasm.OpcodeF32Div: - c.emit( - newOperationDiv(signedTypeFloat32), - ) - case wasm.OpcodeF32Min: - c.emit( - newOperationMin(f32), - ) - case wasm.OpcodeF32Max: - c.emit( - newOperationMax(f32), - ) - case wasm.OpcodeF32Copysign: - c.emit( - newOperationCopysign(f32), - ) - case wasm.OpcodeF64Abs: - c.emit( - newOperationAbs(f64), - ) - case wasm.OpcodeF64Neg: - c.emit( - newOperationNeg(f64), - ) - case wasm.OpcodeF64Ceil: - c.emit( - newOperationCeil(f64), - ) - case wasm.OpcodeF64Floor: - c.emit( - newOperationFloor(f64), - ) - case wasm.OpcodeF64Trunc: - c.emit( - newOperationTrunc(f64), - ) - case wasm.OpcodeF64Nearest: - c.emit( - newOperationNearest(f64), - ) - case wasm.OpcodeF64Sqrt: - c.emit( - newOperationSqrt(f64), - ) - case wasm.OpcodeF64Add: - c.emit( - newOperationAdd(unsignedTypeF64), - ) - case wasm.OpcodeF64Sub: - c.emit( - newOperationSub(unsignedTypeF64), - ) - case wasm.OpcodeF64Mul: - c.emit( - newOperationMul(unsignedTypeF64), - ) - case wasm.OpcodeF64Div: - c.emit( - newOperationDiv(signedTypeFloat64), - ) - case wasm.OpcodeF64Min: - c.emit( - newOperationMin(f64), - ) - case wasm.OpcodeF64Max: - c.emit( - newOperationMax(f64), - ) - case wasm.OpcodeF64Copysign: - c.emit( - newOperationCopysign(f64), - ) - case wasm.OpcodeI32WrapI64: - c.emit( - newOperationI32WrapFromI64(), - ) - case wasm.OpcodeI32TruncF32S: - c.emit( - newOperationITruncFromF(f32, signedInt32, false), - ) - case wasm.OpcodeI32TruncF32U: - c.emit( - newOperationITruncFromF(f32, signedUint32, false), - ) - case wasm.OpcodeI32TruncF64S: - c.emit( - newOperationITruncFromF(f64, signedInt32, false), - ) - case wasm.OpcodeI32TruncF64U: - c.emit( - newOperationITruncFromF(f64, signedUint32, false), - ) - case wasm.OpcodeI64ExtendI32S: - c.emit( - newOperationExtend(true), - ) - case wasm.OpcodeI64ExtendI32U: - c.emit( - newOperationExtend(false), - ) - case wasm.OpcodeI64TruncF32S: - c.emit( - newOperationITruncFromF(f32, signedInt64, false), - ) - case wasm.OpcodeI64TruncF32U: - c.emit( - newOperationITruncFromF(f32, signedUint64, false), - ) - case wasm.OpcodeI64TruncF64S: - c.emit( - newOperationITruncFromF(f64, signedInt64, false), - ) - case wasm.OpcodeI64TruncF64U: - c.emit( - newOperationITruncFromF(f64, signedUint64, false), - ) - case wasm.OpcodeF32ConvertI32S: - c.emit( - newOperationFConvertFromI(signedInt32, f32), - ) - case wasm.OpcodeF32ConvertI32U: - c.emit( - newOperationFConvertFromI(signedUint32, f32), - ) - case wasm.OpcodeF32ConvertI64S: - c.emit( - newOperationFConvertFromI(signedInt64, f32), - ) - case wasm.OpcodeF32ConvertI64U: - c.emit( - newOperationFConvertFromI(signedUint64, f32), - ) - case wasm.OpcodeF32DemoteF64: - c.emit( - newOperationF32DemoteFromF64(), - ) - case wasm.OpcodeF64ConvertI32S: - c.emit( - newOperationFConvertFromI(signedInt32, f64), - ) - case wasm.OpcodeF64ConvertI32U: - c.emit( - newOperationFConvertFromI(signedUint32, f64), - ) - case wasm.OpcodeF64ConvertI64S: - c.emit( - newOperationFConvertFromI(signedInt64, f64), - ) - case wasm.OpcodeF64ConvertI64U: - c.emit( - newOperationFConvertFromI(signedUint64, f64), - ) - case wasm.OpcodeF64PromoteF32: - c.emit( - newOperationF64PromoteFromF32(), - ) - case wasm.OpcodeI32ReinterpretF32: - c.emit( - newOperationI32ReinterpretFromF32(), - ) - case wasm.OpcodeI64ReinterpretF64: - c.emit( - newOperationI64ReinterpretFromF64(), - ) - case wasm.OpcodeF32ReinterpretI32: - c.emit( - newOperationF32ReinterpretFromI32(), - ) - case wasm.OpcodeF64ReinterpretI64: - c.emit( - newOperationF64ReinterpretFromI64(), - ) - case wasm.OpcodeI32Extend8S: - c.emit( - newOperationSignExtend32From8(), - ) - case wasm.OpcodeI32Extend16S: - c.emit( - newOperationSignExtend32From16(), - ) - case wasm.OpcodeI64Extend8S: - c.emit( - newOperationSignExtend64From8(), - ) - case wasm.OpcodeI64Extend16S: - c.emit( - newOperationSignExtend64From16(), - ) - case wasm.OpcodeI64Extend32S: - c.emit( - newOperationSignExtend64From32(), - ) - case wasm.OpcodeRefFunc: - c.pc++ - index, num, err := leb128.LoadUint32(c.body[c.pc:]) - if err != nil { - return fmt.Errorf("failed to read function index for ref.func: %v", err) - } - c.pc += num - 1 - c.emit( - newOperationRefFunc(index), - ) - case wasm.OpcodeRefNull: - c.pc++ // Skip the type of reftype as every ref value is opaque pointer. - c.emit( - newOperationConstI64(0), - ) - case wasm.OpcodeRefIsNull: - // Simply compare the opaque pointer (i64) with zero. - c.emit( - newOperationEqz(unsignedInt64), - ) - case wasm.OpcodeTableGet: - c.pc++ - tableIndex, num, err := leb128.LoadUint32(c.body[c.pc:]) - if err != nil { - return fmt.Errorf("failed to read function index for table.get: %v", err) - } - c.pc += num - 1 - c.emit( - newOperationTableGet(tableIndex), - ) - case wasm.OpcodeTableSet: - c.pc++ - tableIndex, num, err := leb128.LoadUint32(c.body[c.pc:]) - if err != nil { - return fmt.Errorf("failed to read function index for table.set: %v", err) - } - c.pc += num - 1 - c.emit( - newOperationTableSet(tableIndex), - ) - case wasm.OpcodeMiscPrefix: - c.pc++ - // A misc opcode is encoded as an unsigned variable 32-bit integer. - miscOp, num, err := leb128.LoadUint32(c.body[c.pc:]) - if err != nil { - return fmt.Errorf("failed to read misc opcode: %v", err) - } - c.pc += num - 1 - switch byte(miscOp) { - case wasm.OpcodeMiscI32TruncSatF32S: - c.emit( - newOperationITruncFromF(f32, signedInt32, true), - ) - case wasm.OpcodeMiscI32TruncSatF32U: - c.emit( - newOperationITruncFromF(f32, signedUint32, true), - ) - case wasm.OpcodeMiscI32TruncSatF64S: - c.emit( - newOperationITruncFromF(f64, signedInt32, true), - ) - case wasm.OpcodeMiscI32TruncSatF64U: - c.emit( - newOperationITruncFromF(f64, signedUint32, true), - ) - case wasm.OpcodeMiscI64TruncSatF32S: - c.emit( - newOperationITruncFromF(f32, signedInt64, true), - ) - case wasm.OpcodeMiscI64TruncSatF32U: - c.emit( - newOperationITruncFromF(f32, signedUint64, true), - ) - case wasm.OpcodeMiscI64TruncSatF64S: - c.emit( - newOperationITruncFromF(f64, signedInt64, true), - ) - case wasm.OpcodeMiscI64TruncSatF64U: - c.emit( - newOperationITruncFromF(f64, signedUint64, true), - ) - case wasm.OpcodeMiscMemoryInit: - c.result.UsesMemory = true - dataIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return fmt.Errorf("reading i32.const value: %v", err) - } - c.pc += num + 1 // +1 to skip the memory index which is fixed to zero. - c.emit( - newOperationMemoryInit(dataIndex), - ) - case wasm.OpcodeMiscDataDrop: - dataIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return fmt.Errorf("reading i32.const value: %v", err) - } - c.pc += num - c.emit( - newOperationDataDrop(dataIndex), - ) - case wasm.OpcodeMiscMemoryCopy: - c.result.UsesMemory = true - c.pc += 2 // +2 to skip two memory indexes which are fixed to zero. - c.emit( - newOperationMemoryCopy(), - ) - case wasm.OpcodeMiscMemoryFill: - c.result.UsesMemory = true - c.pc += 1 // +1 to skip the memory index which is fixed to zero. - c.emit( - newOperationMemoryFill(), - ) - case wasm.OpcodeMiscTableInit: - elemIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return fmt.Errorf("reading i32.const value: %v", err) - } - c.pc += num - // Read table index which is fixed to zero currently. - tableIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return fmt.Errorf("reading i32.const value: %v", err) - } - c.pc += num - c.emit( - newOperationTableInit(elemIndex, tableIndex), - ) - case wasm.OpcodeMiscElemDrop: - elemIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return fmt.Errorf("reading i32.const value: %v", err) - } - c.pc += num - c.emit( - newOperationElemDrop(elemIndex), - ) - case wasm.OpcodeMiscTableCopy: - // Read the source table inde.g. - dst, num, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return fmt.Errorf("reading i32.const value: %v", err) - } - c.pc += num - // Read the destination table inde.g. - src, num, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return fmt.Errorf("reading i32.const value: %v", err) - } - c.pc += num - c.emit( - newOperationTableCopy(src, dst), - ) - case wasm.OpcodeMiscTableGrow: - // Read the source table inde.g. - tableIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return fmt.Errorf("reading i32.const value: %v", err) - } - c.pc += num - c.emit( - newOperationTableGrow(tableIndex), - ) - case wasm.OpcodeMiscTableSize: - // Read the source table inde.g. - tableIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return fmt.Errorf("reading i32.const value: %v", err) - } - c.pc += num - c.emit( - newOperationTableSize(tableIndex), - ) - case wasm.OpcodeMiscTableFill: - // Read the source table index. - tableIndex, num, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return fmt.Errorf("reading i32.const value: %v", err) - } - c.pc += num - c.emit( - newOperationTableFill(tableIndex), - ) - default: - return fmt.Errorf("unsupported misc instruction in interpreterir: 0x%x", op) - } - case wasm.OpcodeVecPrefix: - c.pc++ - switch vecOp := c.body[c.pc]; vecOp { - case wasm.OpcodeVecV128Const: - c.pc++ - lo := binary.LittleEndian.Uint64(c.body[c.pc : c.pc+8]) - c.pc += 8 - hi := binary.LittleEndian.Uint64(c.body[c.pc : c.pc+8]) - c.emit( - newOperationV128Const(lo, hi), - ) - c.pc += 7 - case wasm.OpcodeVecV128Load: - arg, err := c.readMemoryArg(wasm.OpcodeI32LoadName) - if err != nil { - return err - } - c.emit( - newOperationV128Load(v128LoadType128, arg), - ) - case wasm.OpcodeVecV128Load8x8s: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load8x8SName) - if err != nil { - return err - } - c.emit( - newOperationV128Load(v128LoadType8x8s, arg), - ) - case wasm.OpcodeVecV128Load8x8u: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load8x8UName) - if err != nil { - return err - } - c.emit( - newOperationV128Load(v128LoadType8x8u, arg), - ) - case wasm.OpcodeVecV128Load16x4s: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load16x4SName) - if err != nil { - return err - } - c.emit( - newOperationV128Load(v128LoadType16x4s, arg), - ) - case wasm.OpcodeVecV128Load16x4u: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load16x4UName) - if err != nil { - return err - } - c.emit( - newOperationV128Load(v128LoadType16x4u, arg), - ) - case wasm.OpcodeVecV128Load32x2s: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load32x2SName) - if err != nil { - return err - } - c.emit( - newOperationV128Load(v128LoadType32x2s, arg), - ) - case wasm.OpcodeVecV128Load32x2u: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load32x2UName) - if err != nil { - return err - } - c.emit( - newOperationV128Load(v128LoadType32x2u, arg), - ) - case wasm.OpcodeVecV128Load8Splat: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load8SplatName) - if err != nil { - return err - } - c.emit( - newOperationV128Load(v128LoadType8Splat, arg), - ) - case wasm.OpcodeVecV128Load16Splat: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load16SplatName) - if err != nil { - return err - } - c.emit( - newOperationV128Load(v128LoadType16Splat, arg), - ) - case wasm.OpcodeVecV128Load32Splat: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load32SplatName) - if err != nil { - return err - } - c.emit( - newOperationV128Load(v128LoadType32Splat, arg), - ) - case wasm.OpcodeVecV128Load64Splat: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load64SplatName) - if err != nil { - return err - } - c.emit( - newOperationV128Load(v128LoadType64Splat, arg), - ) - case wasm.OpcodeVecV128Load32zero: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load32zeroName) - if err != nil { - return err - } - c.emit( - newOperationV128Load(v128LoadType32zero, arg), - ) - case wasm.OpcodeVecV128Load64zero: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load64zeroName) - if err != nil { - return err - } - c.emit( - newOperationV128Load(v128LoadType64zero, arg), - ) - case wasm.OpcodeVecV128Load8Lane: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load8LaneName) - if err != nil { - return err - } - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128LoadLane(laneIndex, 8, arg), - ) - case wasm.OpcodeVecV128Load16Lane: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load16LaneName) - if err != nil { - return err - } - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128LoadLane(laneIndex, 16, arg), - ) - case wasm.OpcodeVecV128Load32Lane: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load32LaneName) - if err != nil { - return err - } - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128LoadLane(laneIndex, 32, arg), - ) - case wasm.OpcodeVecV128Load64Lane: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Load64LaneName) - if err != nil { - return err - } - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128LoadLane(laneIndex, 64, arg), - ) - case wasm.OpcodeVecV128Store: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128StoreName) - if err != nil { - return err - } - c.emit( - newOperationV128Store(arg), - ) - case wasm.OpcodeVecV128Store8Lane: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Store8LaneName) - if err != nil { - return err - } - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128StoreLane(laneIndex, 8, arg), - ) - case wasm.OpcodeVecV128Store16Lane: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Store16LaneName) - if err != nil { - return err - } - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128StoreLane(laneIndex, 16, arg), - ) - case wasm.OpcodeVecV128Store32Lane: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Store32LaneName) - if err != nil { - return err - } - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128StoreLane(laneIndex, 32, arg), - ) - case wasm.OpcodeVecV128Store64Lane: - arg, err := c.readMemoryArg(wasm.OpcodeVecV128Store64LaneName) - if err != nil { - return err - } - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128StoreLane(laneIndex, 64, arg), - ) - case wasm.OpcodeVecI8x16ExtractLaneS: - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128ExtractLane(laneIndex, true, shapeI8x16), - ) - case wasm.OpcodeVecI8x16ExtractLaneU: - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128ExtractLane(laneIndex, false, shapeI8x16), - ) - case wasm.OpcodeVecI16x8ExtractLaneS: - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128ExtractLane(laneIndex, true, shapeI16x8), - ) - case wasm.OpcodeVecI16x8ExtractLaneU: - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128ExtractLane(laneIndex, false, shapeI16x8), - ) - case wasm.OpcodeVecI32x4ExtractLane: - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128ExtractLane(laneIndex, false, shapeI32x4), - ) - case wasm.OpcodeVecI64x2ExtractLane: - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128ExtractLane(laneIndex, false, shapeI64x2), - ) - case wasm.OpcodeVecF32x4ExtractLane: - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128ExtractLane(laneIndex, false, shapeF32x4), - ) - case wasm.OpcodeVecF64x2ExtractLane: - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128ExtractLane(laneIndex, false, shapeF64x2), - ) - case wasm.OpcodeVecI8x16ReplaceLane: - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128ReplaceLane(laneIndex, shapeI8x16), - ) - case wasm.OpcodeVecI16x8ReplaceLane: - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128ReplaceLane(laneIndex, shapeI16x8), - ) - case wasm.OpcodeVecI32x4ReplaceLane: - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128ReplaceLane(laneIndex, shapeI32x4), - ) - case wasm.OpcodeVecI64x2ReplaceLane: - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128ReplaceLane(laneIndex, shapeI64x2), - ) - case wasm.OpcodeVecF32x4ReplaceLane: - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128ReplaceLane(laneIndex, shapeF32x4), - ) - case wasm.OpcodeVecF64x2ReplaceLane: - c.pc++ - laneIndex := c.body[c.pc] - c.emit( - newOperationV128ReplaceLane(laneIndex, shapeF64x2), - ) - case wasm.OpcodeVecI8x16Splat: - c.emit( - newOperationV128Splat(shapeI8x16), - ) - case wasm.OpcodeVecI16x8Splat: - c.emit( - newOperationV128Splat(shapeI16x8), - ) - case wasm.OpcodeVecI32x4Splat: - c.emit( - newOperationV128Splat(shapeI32x4), - ) - case wasm.OpcodeVecI64x2Splat: - c.emit( - newOperationV128Splat(shapeI64x2), - ) - case wasm.OpcodeVecF32x4Splat: - c.emit( - newOperationV128Splat(shapeF32x4), - ) - case wasm.OpcodeVecF64x2Splat: - c.emit( - newOperationV128Splat(shapeF64x2), - ) - case wasm.OpcodeVecI8x16Swizzle: - c.emit( - newOperationV128Swizzle(), - ) - case wasm.OpcodeVecV128i8x16Shuffle: - c.pc++ - lanes := make([]uint64, 16) - for i := uint64(0); i < 16; i++ { - lanes[i] = uint64(c.body[c.pc+i]) - } - op := newOperationV128Shuffle(lanes) - c.emit(op) - c.pc += 15 - case wasm.OpcodeVecV128AnyTrue: - c.emit( - newOperationV128AnyTrue(), - ) - case wasm.OpcodeVecI8x16AllTrue: - c.emit( - newOperationV128AllTrue(shapeI8x16), - ) - case wasm.OpcodeVecI16x8AllTrue: - c.emit( - newOperationV128AllTrue(shapeI16x8), - ) - case wasm.OpcodeVecI32x4AllTrue: - c.emit( - newOperationV128AllTrue(shapeI32x4), - ) - case wasm.OpcodeVecI64x2AllTrue: - c.emit( - newOperationV128AllTrue(shapeI64x2), - ) - case wasm.OpcodeVecI8x16BitMask: - c.emit( - newOperationV128BitMask(shapeI8x16), - ) - case wasm.OpcodeVecI16x8BitMask: - c.emit( - newOperationV128BitMask(shapeI16x8), - ) - case wasm.OpcodeVecI32x4BitMask: - c.emit( - newOperationV128BitMask(shapeI32x4), - ) - case wasm.OpcodeVecI64x2BitMask: - c.emit( - newOperationV128BitMask(shapeI64x2), - ) - case wasm.OpcodeVecV128And: - c.emit( - newOperationV128And(), - ) - case wasm.OpcodeVecV128Not: - c.emit( - newOperationV128Not(), - ) - case wasm.OpcodeVecV128Or: - c.emit( - newOperationV128Or(), - ) - case wasm.OpcodeVecV128Xor: - c.emit( - newOperationV128Xor(), - ) - case wasm.OpcodeVecV128Bitselect: - c.emit( - newOperationV128Bitselect(), - ) - case wasm.OpcodeVecV128AndNot: - c.emit( - newOperationV128AndNot(), - ) - case wasm.OpcodeVecI8x16Shl: - c.emit( - newOperationV128Shl(shapeI8x16), - ) - case wasm.OpcodeVecI8x16ShrS: - c.emit( - newOperationV128Shr(shapeI8x16, true), - ) - case wasm.OpcodeVecI8x16ShrU: - c.emit( - newOperationV128Shr(shapeI8x16, false), - ) - case wasm.OpcodeVecI16x8Shl: - c.emit( - newOperationV128Shl(shapeI16x8), - ) - case wasm.OpcodeVecI16x8ShrS: - c.emit( - newOperationV128Shr(shapeI16x8, true), - ) - case wasm.OpcodeVecI16x8ShrU: - c.emit( - newOperationV128Shr(shapeI16x8, false), - ) - case wasm.OpcodeVecI32x4Shl: - c.emit( - newOperationV128Shl(shapeI32x4), - ) - case wasm.OpcodeVecI32x4ShrS: - c.emit( - newOperationV128Shr(shapeI32x4, true), - ) - case wasm.OpcodeVecI32x4ShrU: - c.emit( - newOperationV128Shr(shapeI32x4, false), - ) - case wasm.OpcodeVecI64x2Shl: - c.emit( - newOperationV128Shl(shapeI64x2), - ) - case wasm.OpcodeVecI64x2ShrS: - c.emit( - newOperationV128Shr(shapeI64x2, true), - ) - case wasm.OpcodeVecI64x2ShrU: - c.emit( - newOperationV128Shr(shapeI64x2, false), - ) - case wasm.OpcodeVecI8x16Eq: - c.emit( - newOperationV128Cmp(v128CmpTypeI8x16Eq), - ) - case wasm.OpcodeVecI8x16Ne: - c.emit( - newOperationV128Cmp(v128CmpTypeI8x16Ne), - ) - case wasm.OpcodeVecI8x16LtS: - c.emit( - newOperationV128Cmp(v128CmpTypeI8x16LtS), - ) - case wasm.OpcodeVecI8x16LtU: - c.emit( - newOperationV128Cmp(v128CmpTypeI8x16LtU), - ) - case wasm.OpcodeVecI8x16GtS: - c.emit( - newOperationV128Cmp(v128CmpTypeI8x16GtS), - ) - case wasm.OpcodeVecI8x16GtU: - c.emit( - newOperationV128Cmp(v128CmpTypeI8x16GtU), - ) - case wasm.OpcodeVecI8x16LeS: - c.emit( - newOperationV128Cmp(v128CmpTypeI8x16LeS), - ) - case wasm.OpcodeVecI8x16LeU: - c.emit( - newOperationV128Cmp(v128CmpTypeI8x16LeU), - ) - case wasm.OpcodeVecI8x16GeS: - c.emit( - newOperationV128Cmp(v128CmpTypeI8x16GeS), - ) - case wasm.OpcodeVecI8x16GeU: - c.emit( - newOperationV128Cmp(v128CmpTypeI8x16GeU), - ) - case wasm.OpcodeVecI16x8Eq: - c.emit( - newOperationV128Cmp(v128CmpTypeI16x8Eq), - ) - case wasm.OpcodeVecI16x8Ne: - c.emit( - newOperationV128Cmp(v128CmpTypeI16x8Ne), - ) - case wasm.OpcodeVecI16x8LtS: - c.emit( - newOperationV128Cmp(v128CmpTypeI16x8LtS), - ) - case wasm.OpcodeVecI16x8LtU: - c.emit( - newOperationV128Cmp(v128CmpTypeI16x8LtU), - ) - case wasm.OpcodeVecI16x8GtS: - c.emit( - newOperationV128Cmp(v128CmpTypeI16x8GtS), - ) - case wasm.OpcodeVecI16x8GtU: - c.emit( - newOperationV128Cmp(v128CmpTypeI16x8GtU), - ) - case wasm.OpcodeVecI16x8LeS: - c.emit( - newOperationV128Cmp(v128CmpTypeI16x8LeS), - ) - case wasm.OpcodeVecI16x8LeU: - c.emit( - newOperationV128Cmp(v128CmpTypeI16x8LeU), - ) - case wasm.OpcodeVecI16x8GeS: - c.emit( - newOperationV128Cmp(v128CmpTypeI16x8GeS), - ) - case wasm.OpcodeVecI16x8GeU: - c.emit( - newOperationV128Cmp(v128CmpTypeI16x8GeU), - ) - case wasm.OpcodeVecI32x4Eq: - c.emit( - newOperationV128Cmp(v128CmpTypeI32x4Eq), - ) - case wasm.OpcodeVecI32x4Ne: - c.emit( - newOperationV128Cmp(v128CmpTypeI32x4Ne), - ) - case wasm.OpcodeVecI32x4LtS: - c.emit( - newOperationV128Cmp(v128CmpTypeI32x4LtS), - ) - case wasm.OpcodeVecI32x4LtU: - c.emit( - newOperationV128Cmp(v128CmpTypeI32x4LtU), - ) - case wasm.OpcodeVecI32x4GtS: - c.emit( - newOperationV128Cmp(v128CmpTypeI32x4GtS), - ) - case wasm.OpcodeVecI32x4GtU: - c.emit( - newOperationV128Cmp(v128CmpTypeI32x4GtU), - ) - case wasm.OpcodeVecI32x4LeS: - c.emit( - newOperationV128Cmp(v128CmpTypeI32x4LeS), - ) - case wasm.OpcodeVecI32x4LeU: - c.emit( - newOperationV128Cmp(v128CmpTypeI32x4LeU), - ) - case wasm.OpcodeVecI32x4GeS: - c.emit( - newOperationV128Cmp(v128CmpTypeI32x4GeS), - ) - case wasm.OpcodeVecI32x4GeU: - c.emit( - newOperationV128Cmp(v128CmpTypeI32x4GeU), - ) - case wasm.OpcodeVecI64x2Eq: - c.emit( - newOperationV128Cmp(v128CmpTypeI64x2Eq), - ) - case wasm.OpcodeVecI64x2Ne: - c.emit( - newOperationV128Cmp(v128CmpTypeI64x2Ne), - ) - case wasm.OpcodeVecI64x2LtS: - c.emit( - newOperationV128Cmp(v128CmpTypeI64x2LtS), - ) - case wasm.OpcodeVecI64x2GtS: - c.emit( - newOperationV128Cmp(v128CmpTypeI64x2GtS), - ) - case wasm.OpcodeVecI64x2LeS: - c.emit( - newOperationV128Cmp(v128CmpTypeI64x2LeS), - ) - case wasm.OpcodeVecI64x2GeS: - c.emit( - newOperationV128Cmp(v128CmpTypeI64x2GeS), - ) - case wasm.OpcodeVecF32x4Eq: - c.emit( - newOperationV128Cmp(v128CmpTypeF32x4Eq), - ) - case wasm.OpcodeVecF32x4Ne: - c.emit( - newOperationV128Cmp(v128CmpTypeF32x4Ne), - ) - case wasm.OpcodeVecF32x4Lt: - c.emit( - newOperationV128Cmp(v128CmpTypeF32x4Lt), - ) - case wasm.OpcodeVecF32x4Gt: - c.emit( - newOperationV128Cmp(v128CmpTypeF32x4Gt), - ) - case wasm.OpcodeVecF32x4Le: - c.emit( - newOperationV128Cmp(v128CmpTypeF32x4Le), - ) - case wasm.OpcodeVecF32x4Ge: - c.emit( - newOperationV128Cmp(v128CmpTypeF32x4Ge), - ) - case wasm.OpcodeVecF64x2Eq: - c.emit( - newOperationV128Cmp(v128CmpTypeF64x2Eq), - ) - case wasm.OpcodeVecF64x2Ne: - c.emit( - newOperationV128Cmp(v128CmpTypeF64x2Ne), - ) - case wasm.OpcodeVecF64x2Lt: - c.emit( - newOperationV128Cmp(v128CmpTypeF64x2Lt), - ) - case wasm.OpcodeVecF64x2Gt: - c.emit( - newOperationV128Cmp(v128CmpTypeF64x2Gt), - ) - case wasm.OpcodeVecF64x2Le: - c.emit( - newOperationV128Cmp(v128CmpTypeF64x2Le), - ) - case wasm.OpcodeVecF64x2Ge: - c.emit( - newOperationV128Cmp(v128CmpTypeF64x2Ge), - ) - case wasm.OpcodeVecI8x16Neg: - c.emit( - newOperationV128Neg(shapeI8x16), - ) - case wasm.OpcodeVecI16x8Neg: - c.emit( - newOperationV128Neg(shapeI16x8), - ) - case wasm.OpcodeVecI32x4Neg: - c.emit( - newOperationV128Neg(shapeI32x4), - ) - case wasm.OpcodeVecI64x2Neg: - c.emit( - newOperationV128Neg(shapeI64x2), - ) - case wasm.OpcodeVecF32x4Neg: - c.emit( - newOperationV128Neg(shapeF32x4), - ) - case wasm.OpcodeVecF64x2Neg: - c.emit( - newOperationV128Neg(shapeF64x2), - ) - case wasm.OpcodeVecI8x16Add: - c.emit( - newOperationV128Add(shapeI8x16), - ) - case wasm.OpcodeVecI16x8Add: - c.emit( - newOperationV128Add(shapeI16x8), - ) - case wasm.OpcodeVecI32x4Add: - c.emit( - newOperationV128Add(shapeI32x4), - ) - case wasm.OpcodeVecI64x2Add: - c.emit( - newOperationV128Add(shapeI64x2), - ) - case wasm.OpcodeVecF32x4Add: - c.emit( - newOperationV128Add(shapeF32x4), - ) - case wasm.OpcodeVecF64x2Add: - c.emit( - newOperationV128Add(shapeF64x2), - ) - case wasm.OpcodeVecI8x16Sub: - c.emit( - newOperationV128Sub(shapeI8x16), - ) - case wasm.OpcodeVecI16x8Sub: - c.emit( - newOperationV128Sub(shapeI16x8), - ) - case wasm.OpcodeVecI32x4Sub: - c.emit( - newOperationV128Sub(shapeI32x4), - ) - case wasm.OpcodeVecI64x2Sub: - c.emit( - newOperationV128Sub(shapeI64x2), - ) - case wasm.OpcodeVecF32x4Sub: - c.emit( - newOperationV128Sub(shapeF32x4), - ) - case wasm.OpcodeVecF64x2Sub: - c.emit( - newOperationV128Sub(shapeF64x2), - ) - case wasm.OpcodeVecI8x16AddSatS: - c.emit( - newOperationV128AddSat(shapeI8x16, true), - ) - case wasm.OpcodeVecI8x16AddSatU: - c.emit( - newOperationV128AddSat(shapeI8x16, false), - ) - case wasm.OpcodeVecI16x8AddSatS: - c.emit( - newOperationV128AddSat(shapeI16x8, true), - ) - case wasm.OpcodeVecI16x8AddSatU: - c.emit( - newOperationV128AddSat(shapeI16x8, false), - ) - case wasm.OpcodeVecI8x16SubSatS: - c.emit( - newOperationV128SubSat(shapeI8x16, true), - ) - case wasm.OpcodeVecI8x16SubSatU: - c.emit( - newOperationV128SubSat(shapeI8x16, false), - ) - case wasm.OpcodeVecI16x8SubSatS: - c.emit( - newOperationV128SubSat(shapeI16x8, true), - ) - case wasm.OpcodeVecI16x8SubSatU: - c.emit( - newOperationV128SubSat(shapeI16x8, false), - ) - case wasm.OpcodeVecI16x8Mul: - c.emit( - newOperationV128Mul(shapeI16x8), - ) - case wasm.OpcodeVecI32x4Mul: - c.emit( - newOperationV128Mul(shapeI32x4), - ) - case wasm.OpcodeVecI64x2Mul: - c.emit( - newOperationV128Mul(shapeI64x2), - ) - case wasm.OpcodeVecF32x4Mul: - c.emit( - newOperationV128Mul(shapeF32x4), - ) - case wasm.OpcodeVecF64x2Mul: - c.emit( - newOperationV128Mul(shapeF64x2), - ) - case wasm.OpcodeVecF32x4Sqrt: - c.emit( - newOperationV128Sqrt(shapeF32x4), - ) - case wasm.OpcodeVecF64x2Sqrt: - c.emit( - newOperationV128Sqrt(shapeF64x2), - ) - case wasm.OpcodeVecF32x4Div: - c.emit( - newOperationV128Div(shapeF32x4), - ) - case wasm.OpcodeVecF64x2Div: - c.emit( - newOperationV128Div(shapeF64x2), - ) - case wasm.OpcodeVecI8x16Abs: - c.emit( - newOperationV128Abs(shapeI8x16), - ) - case wasm.OpcodeVecI8x16Popcnt: - c.emit( - newOperationV128Popcnt(shapeI8x16), - ) - case wasm.OpcodeVecI16x8Abs: - c.emit( - newOperationV128Abs(shapeI16x8), - ) - case wasm.OpcodeVecI32x4Abs: - c.emit( - newOperationV128Abs(shapeI32x4), - ) - case wasm.OpcodeVecI64x2Abs: - c.emit( - newOperationV128Abs(shapeI64x2), - ) - case wasm.OpcodeVecF32x4Abs: - c.emit( - newOperationV128Abs(shapeF32x4), - ) - case wasm.OpcodeVecF64x2Abs: - c.emit( - newOperationV128Abs(shapeF64x2), - ) - case wasm.OpcodeVecI8x16MinS: - c.emit( - newOperationV128Min(shapeI8x16, true), - ) - case wasm.OpcodeVecI8x16MinU: - c.emit( - newOperationV128Min(shapeI8x16, false), - ) - case wasm.OpcodeVecI8x16MaxS: - c.emit( - newOperationV128Max(shapeI8x16, true), - ) - case wasm.OpcodeVecI8x16MaxU: - c.emit( - newOperationV128Max(shapeI8x16, false), - ) - case wasm.OpcodeVecI8x16AvgrU: - c.emit( - newOperationV128AvgrU(shapeI8x16), - ) - case wasm.OpcodeVecI16x8MinS: - c.emit( - newOperationV128Min(shapeI16x8, true), - ) - case wasm.OpcodeVecI16x8MinU: - c.emit( - newOperationV128Min(shapeI16x8, false), - ) - case wasm.OpcodeVecI16x8MaxS: - c.emit( - newOperationV128Max(shapeI16x8, true), - ) - case wasm.OpcodeVecI16x8MaxU: - c.emit( - newOperationV128Max(shapeI16x8, false), - ) - case wasm.OpcodeVecI16x8AvgrU: - c.emit( - newOperationV128AvgrU(shapeI16x8), - ) - case wasm.OpcodeVecI32x4MinS: - c.emit( - newOperationV128Min(shapeI32x4, true), - ) - case wasm.OpcodeVecI32x4MinU: - c.emit( - newOperationV128Min(shapeI32x4, false), - ) - case wasm.OpcodeVecI32x4MaxS: - c.emit( - newOperationV128Max(shapeI32x4, true), - ) - case wasm.OpcodeVecI32x4MaxU: - c.emit( - newOperationV128Max(shapeI32x4, false), - ) - case wasm.OpcodeVecF32x4Min: - c.emit( - newOperationV128Min(shapeF32x4, false), - ) - case wasm.OpcodeVecF32x4Max: - c.emit( - newOperationV128Max(shapeF32x4, false), - ) - case wasm.OpcodeVecF64x2Min: - c.emit( - newOperationV128Min(shapeF64x2, false), - ) - case wasm.OpcodeVecF64x2Max: - c.emit( - newOperationV128Max(shapeF64x2, false), - ) - case wasm.OpcodeVecF32x4Pmin: - c.emit( - newOperationV128Pmin(shapeF32x4), - ) - case wasm.OpcodeVecF32x4Pmax: - c.emit( - newOperationV128Pmax(shapeF32x4), - ) - case wasm.OpcodeVecF64x2Pmin: - c.emit( - newOperationV128Pmin(shapeF64x2), - ) - case wasm.OpcodeVecF64x2Pmax: - c.emit( - newOperationV128Pmax(shapeF64x2), - ) - case wasm.OpcodeVecF32x4Ceil: - c.emit( - newOperationV128Ceil(shapeF32x4), - ) - case wasm.OpcodeVecF32x4Floor: - c.emit( - newOperationV128Floor(shapeF32x4), - ) - case wasm.OpcodeVecF32x4Trunc: - c.emit( - newOperationV128Trunc(shapeF32x4), - ) - case wasm.OpcodeVecF32x4Nearest: - c.emit( - newOperationV128Nearest(shapeF32x4), - ) - case wasm.OpcodeVecF64x2Ceil: - c.emit( - newOperationV128Ceil(shapeF64x2), - ) - case wasm.OpcodeVecF64x2Floor: - c.emit( - newOperationV128Floor(shapeF64x2), - ) - case wasm.OpcodeVecF64x2Trunc: - c.emit( - newOperationV128Trunc(shapeF64x2), - ) - case wasm.OpcodeVecF64x2Nearest: - c.emit( - newOperationV128Nearest(shapeF64x2), - ) - case wasm.OpcodeVecI16x8ExtendLowI8x16S: - c.emit( - newOperationV128Extend(shapeI8x16, true, true), - ) - case wasm.OpcodeVecI16x8ExtendHighI8x16S: - c.emit( - newOperationV128Extend(shapeI8x16, true, false), - ) - case wasm.OpcodeVecI16x8ExtendLowI8x16U: - c.emit( - newOperationV128Extend(shapeI8x16, false, true), - ) - case wasm.OpcodeVecI16x8ExtendHighI8x16U: - c.emit( - newOperationV128Extend(shapeI8x16, false, false), - ) - case wasm.OpcodeVecI32x4ExtendLowI16x8S: - c.emit( - newOperationV128Extend(shapeI16x8, true, true), - ) - case wasm.OpcodeVecI32x4ExtendHighI16x8S: - c.emit( - newOperationV128Extend(shapeI16x8, true, false), - ) - case wasm.OpcodeVecI32x4ExtendLowI16x8U: - c.emit( - newOperationV128Extend(shapeI16x8, false, true), - ) - case wasm.OpcodeVecI32x4ExtendHighI16x8U: - c.emit( - newOperationV128Extend(shapeI16x8, false, false), - ) - case wasm.OpcodeVecI64x2ExtendLowI32x4S: - c.emit( - newOperationV128Extend(shapeI32x4, true, true), - ) - case wasm.OpcodeVecI64x2ExtendHighI32x4S: - c.emit( - newOperationV128Extend(shapeI32x4, true, false), - ) - case wasm.OpcodeVecI64x2ExtendLowI32x4U: - c.emit( - newOperationV128Extend(shapeI32x4, false, true), - ) - case wasm.OpcodeVecI64x2ExtendHighI32x4U: - c.emit( - newOperationV128Extend(shapeI32x4, false, false), - ) - case wasm.OpcodeVecI16x8Q15mulrSatS: - c.emit( - newOperationV128Q15mulrSatS(), - ) - case wasm.OpcodeVecI16x8ExtMulLowI8x16S: - c.emit( - newOperationV128ExtMul(shapeI8x16, true, true), - ) - case wasm.OpcodeVecI16x8ExtMulHighI8x16S: - c.emit( - newOperationV128ExtMul(shapeI8x16, true, false), - ) - case wasm.OpcodeVecI16x8ExtMulLowI8x16U: - c.emit( - newOperationV128ExtMul(shapeI8x16, false, true), - ) - case wasm.OpcodeVecI16x8ExtMulHighI8x16U: - c.emit( - newOperationV128ExtMul(shapeI8x16, false, false), - ) - case wasm.OpcodeVecI32x4ExtMulLowI16x8S: - c.emit( - newOperationV128ExtMul(shapeI16x8, true, true), - ) - case wasm.OpcodeVecI32x4ExtMulHighI16x8S: - c.emit( - newOperationV128ExtMul(shapeI16x8, true, false), - ) - case wasm.OpcodeVecI32x4ExtMulLowI16x8U: - c.emit( - newOperationV128ExtMul(shapeI16x8, false, true), - ) - case wasm.OpcodeVecI32x4ExtMulHighI16x8U: - c.emit( - newOperationV128ExtMul(shapeI16x8, false, false), - ) - case wasm.OpcodeVecI64x2ExtMulLowI32x4S: - c.emit( - newOperationV128ExtMul(shapeI32x4, true, true), - ) - case wasm.OpcodeVecI64x2ExtMulHighI32x4S: - c.emit( - newOperationV128ExtMul(shapeI32x4, true, false), - ) - case wasm.OpcodeVecI64x2ExtMulLowI32x4U: - c.emit( - newOperationV128ExtMul(shapeI32x4, false, true), - ) - case wasm.OpcodeVecI64x2ExtMulHighI32x4U: - c.emit( - newOperationV128ExtMul(shapeI32x4, false, false), - ) - case wasm.OpcodeVecI16x8ExtaddPairwiseI8x16S: - c.emit( - newOperationV128ExtAddPairwise(shapeI8x16, true), - ) - case wasm.OpcodeVecI16x8ExtaddPairwiseI8x16U: - c.emit( - newOperationV128ExtAddPairwise(shapeI8x16, false), - ) - case wasm.OpcodeVecI32x4ExtaddPairwiseI16x8S: - c.emit( - newOperationV128ExtAddPairwise(shapeI16x8, true), - ) - case wasm.OpcodeVecI32x4ExtaddPairwiseI16x8U: - c.emit( - newOperationV128ExtAddPairwise(shapeI16x8, false), - ) - case wasm.OpcodeVecF64x2PromoteLowF32x4Zero: - c.emit( - newOperationV128FloatPromote(), - ) - case wasm.OpcodeVecF32x4DemoteF64x2Zero: - c.emit( - newOperationV128FloatDemote(), - ) - case wasm.OpcodeVecF32x4ConvertI32x4S: - c.emit( - newOperationV128FConvertFromI(shapeF32x4, true), - ) - case wasm.OpcodeVecF32x4ConvertI32x4U: - c.emit( - newOperationV128FConvertFromI(shapeF32x4, false), - ) - case wasm.OpcodeVecF64x2ConvertLowI32x4S: - c.emit( - newOperationV128FConvertFromI(shapeF64x2, true), - ) - case wasm.OpcodeVecF64x2ConvertLowI32x4U: - c.emit( - newOperationV128FConvertFromI(shapeF64x2, false), - ) - case wasm.OpcodeVecI32x4DotI16x8S: - c.emit( - newOperationV128Dot(), - ) - case wasm.OpcodeVecI8x16NarrowI16x8S: - c.emit( - newOperationV128Narrow(shapeI16x8, true), - ) - case wasm.OpcodeVecI8x16NarrowI16x8U: - c.emit( - newOperationV128Narrow(shapeI16x8, false), - ) - case wasm.OpcodeVecI16x8NarrowI32x4S: - c.emit( - newOperationV128Narrow(shapeI32x4, true), - ) - case wasm.OpcodeVecI16x8NarrowI32x4U: - c.emit( - newOperationV128Narrow(shapeI32x4, false), - ) - case wasm.OpcodeVecI32x4TruncSatF32x4S: - c.emit( - newOperationV128ITruncSatFromF(shapeF32x4, true), - ) - case wasm.OpcodeVecI32x4TruncSatF32x4U: - c.emit( - newOperationV128ITruncSatFromF(shapeF32x4, false), - ) - case wasm.OpcodeVecI32x4TruncSatF64x2SZero: - c.emit( - newOperationV128ITruncSatFromF(shapeF64x2, true), - ) - case wasm.OpcodeVecI32x4TruncSatF64x2UZero: - c.emit( - newOperationV128ITruncSatFromF(shapeF64x2, false), - ) - default: - return fmt.Errorf("unsupported vector instruction in interpreterir: %s", wasm.VectorInstructionName(vecOp)) - } - case wasm.OpcodeAtomicPrefix: - c.pc++ - atomicOp := c.body[c.pc] - switch atomicOp { - case wasm.OpcodeAtomicMemoryWait32: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicMemoryWait32Name) - if err != nil { - return err - } - c.emit( - newOperationAtomicMemoryWait(unsignedTypeI32, imm), - ) - case wasm.OpcodeAtomicMemoryWait64: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicMemoryWait64Name) - if err != nil { - return err - } - c.emit( - newOperationAtomicMemoryWait(unsignedTypeI64, imm), - ) - case wasm.OpcodeAtomicMemoryNotify: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicMemoryNotifyName) - if err != nil { - return err - } - c.emit( - newOperationAtomicMemoryNotify(imm), - ) - case wasm.OpcodeAtomicFence: - // Skip immediate value - c.pc++ - _ = c.body[c.pc] - c.emit( - newOperationAtomicFence(), - ) - case wasm.OpcodeAtomicI32Load: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32LoadName) - if err != nil { - return err - } - c.emit( - newOperationAtomicLoad(unsignedTypeI32, imm), - ) - case wasm.OpcodeAtomicI64Load: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64LoadName) - if err != nil { - return err - } - c.emit( - newOperationAtomicLoad(unsignedTypeI64, imm), - ) - case wasm.OpcodeAtomicI32Load8U: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Load8UName) - if err != nil { - return err - } - c.emit( - newOperationAtomicLoad8(unsignedTypeI32, imm), - ) - case wasm.OpcodeAtomicI32Load16U: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Load16UName) - if err != nil { - return err - } - c.emit( - newOperationAtomicLoad16(unsignedTypeI32, imm), - ) - case wasm.OpcodeAtomicI64Load8U: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Load8UName) - if err != nil { - return err - } - c.emit( - newOperationAtomicLoad8(unsignedTypeI64, imm), - ) - case wasm.OpcodeAtomicI64Load16U: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Load16UName) - if err != nil { - return err - } - c.emit( - newOperationAtomicLoad16(unsignedTypeI64, imm), - ) - case wasm.OpcodeAtomicI64Load32U: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Load32UName) - if err != nil { - return err - } - c.emit( - newOperationAtomicLoad(unsignedTypeI32, imm), - ) - case wasm.OpcodeAtomicI32Store: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32StoreName) - if err != nil { - return err - } - c.emit( - newOperationAtomicStore(unsignedTypeI32, imm), - ) - case wasm.OpcodeAtomicI32Store8: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Store8Name) - if err != nil { - return err - } - c.emit( - newOperationAtomicStore8(unsignedTypeI32, imm), - ) - case wasm.OpcodeAtomicI32Store16: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Store16Name) - if err != nil { - return err - } - c.emit( - newOperationAtomicStore16(unsignedTypeI32, imm), - ) - case wasm.OpcodeAtomicI64Store: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64StoreName) - if err != nil { - return err - } - c.emit( - newOperationAtomicStore(unsignedTypeI64, imm), - ) - case wasm.OpcodeAtomicI64Store8: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Store8Name) - if err != nil { - return err - } - c.emit( - newOperationAtomicStore8(unsignedTypeI64, imm), - ) - case wasm.OpcodeAtomicI64Store16: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Store16Name) - if err != nil { - return err - } - c.emit( - newOperationAtomicStore16(unsignedTypeI64, imm), - ) - case wasm.OpcodeAtomicI64Store32: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Store32Name) - if err != nil { - return err - } - c.emit( - newOperationAtomicStore(unsignedTypeI32, imm), - ) - case wasm.OpcodeAtomicI32RmwAdd: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32RmwAddName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpAdd), - ) - case wasm.OpcodeAtomicI64RmwAdd: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64RmwAddName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI64, imm, atomicArithmeticOpAdd), - ) - case wasm.OpcodeAtomicI32Rmw8AddU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw8AddUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW8(unsignedTypeI32, imm, atomicArithmeticOpAdd), - ) - case wasm.OpcodeAtomicI64Rmw8AddU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw8AddUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW8(unsignedTypeI64, imm, atomicArithmeticOpAdd), - ) - case wasm.OpcodeAtomicI32Rmw16AddU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw16AddUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW16(unsignedTypeI32, imm, atomicArithmeticOpAdd), - ) - case wasm.OpcodeAtomicI64Rmw16AddU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw16AddUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW16(unsignedTypeI64, imm, atomicArithmeticOpAdd), - ) - case wasm.OpcodeAtomicI64Rmw32AddU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw32AddUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpAdd), - ) - case wasm.OpcodeAtomicI32RmwSub: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32RmwSubName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpSub), - ) - case wasm.OpcodeAtomicI64RmwSub: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64RmwSubName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI64, imm, atomicArithmeticOpSub), - ) - case wasm.OpcodeAtomicI32Rmw8SubU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw8SubUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW8(unsignedTypeI32, imm, atomicArithmeticOpSub), - ) - case wasm.OpcodeAtomicI64Rmw8SubU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw8SubUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW8(unsignedTypeI64, imm, atomicArithmeticOpSub), - ) - case wasm.OpcodeAtomicI32Rmw16SubU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw16SubUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW16(unsignedTypeI32, imm, atomicArithmeticOpSub), - ) - case wasm.OpcodeAtomicI64Rmw16SubU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw16SubUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW16(unsignedTypeI64, imm, atomicArithmeticOpSub), - ) - case wasm.OpcodeAtomicI64Rmw32SubU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw32SubUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpSub), - ) - case wasm.OpcodeAtomicI32RmwAnd: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32RmwAndName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpAnd), - ) - case wasm.OpcodeAtomicI64RmwAnd: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64RmwAndName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI64, imm, atomicArithmeticOpAnd), - ) - case wasm.OpcodeAtomicI32Rmw8AndU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw8AndUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW8(unsignedTypeI32, imm, atomicArithmeticOpAnd), - ) - case wasm.OpcodeAtomicI64Rmw8AndU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw8AndUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW8(unsignedTypeI64, imm, atomicArithmeticOpAnd), - ) - case wasm.OpcodeAtomicI32Rmw16AndU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw16AndUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW16(unsignedTypeI32, imm, atomicArithmeticOpAnd), - ) - case wasm.OpcodeAtomicI64Rmw16AndU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw16AndUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW16(unsignedTypeI64, imm, atomicArithmeticOpAnd), - ) - case wasm.OpcodeAtomicI64Rmw32AndU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw32AndUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpAnd), - ) - case wasm.OpcodeAtomicI32RmwOr: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32RmwOrName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpOr), - ) - case wasm.OpcodeAtomicI64RmwOr: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64RmwOrName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI64, imm, atomicArithmeticOpOr), - ) - case wasm.OpcodeAtomicI32Rmw8OrU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw8OrUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW8(unsignedTypeI32, imm, atomicArithmeticOpOr), - ) - case wasm.OpcodeAtomicI64Rmw8OrU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw8OrUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW8(unsignedTypeI64, imm, atomicArithmeticOpOr), - ) - case wasm.OpcodeAtomicI32Rmw16OrU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw16OrUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW16(unsignedTypeI32, imm, atomicArithmeticOpOr), - ) - case wasm.OpcodeAtomicI64Rmw16OrU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw16OrUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW16(unsignedTypeI64, imm, atomicArithmeticOpOr), - ) - case wasm.OpcodeAtomicI64Rmw32OrU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw32OrUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpOr), - ) - case wasm.OpcodeAtomicI32RmwXor: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32RmwXorName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpXor), - ) - case wasm.OpcodeAtomicI64RmwXor: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64RmwXorName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI64, imm, atomicArithmeticOpXor), - ) - case wasm.OpcodeAtomicI32Rmw8XorU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw8XorUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW8(unsignedTypeI32, imm, atomicArithmeticOpXor), - ) - case wasm.OpcodeAtomicI64Rmw8XorU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw8XorUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW8(unsignedTypeI64, imm, atomicArithmeticOpXor), - ) - case wasm.OpcodeAtomicI32Rmw16XorU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw16XorUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW16(unsignedTypeI32, imm, atomicArithmeticOpXor), - ) - case wasm.OpcodeAtomicI64Rmw16XorU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw16XorUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW16(unsignedTypeI64, imm, atomicArithmeticOpXor), - ) - case wasm.OpcodeAtomicI64Rmw32XorU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw32XorUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpXor), - ) - case wasm.OpcodeAtomicI32RmwXchg: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32RmwXchgName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpNop), - ) - case wasm.OpcodeAtomicI64RmwXchg: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64RmwXchgName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI64, imm, atomicArithmeticOpNop), - ) - case wasm.OpcodeAtomicI32Rmw8XchgU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw8XchgUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW8(unsignedTypeI32, imm, atomicArithmeticOpNop), - ) - case wasm.OpcodeAtomicI64Rmw8XchgU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw8XchgUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW8(unsignedTypeI64, imm, atomicArithmeticOpNop), - ) - case wasm.OpcodeAtomicI32Rmw16XchgU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw16XchgUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW16(unsignedTypeI32, imm, atomicArithmeticOpNop), - ) - case wasm.OpcodeAtomicI64Rmw16XchgU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw16XchgUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW16(unsignedTypeI64, imm, atomicArithmeticOpNop), - ) - case wasm.OpcodeAtomicI64Rmw32XchgU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw32XchgUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW(unsignedTypeI32, imm, atomicArithmeticOpNop), - ) - case wasm.OpcodeAtomicI32RmwCmpxchg: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32RmwCmpxchgName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMWCmpxchg(unsignedTypeI32, imm), - ) - case wasm.OpcodeAtomicI64RmwCmpxchg: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64RmwCmpxchgName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMWCmpxchg(unsignedTypeI64, imm), - ) - case wasm.OpcodeAtomicI32Rmw8CmpxchgU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw8CmpxchgUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW8Cmpxchg(unsignedTypeI32, imm), - ) - case wasm.OpcodeAtomicI64Rmw8CmpxchgU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw8CmpxchgUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW8Cmpxchg(unsignedTypeI64, imm), - ) - case wasm.OpcodeAtomicI32Rmw16CmpxchgU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI32Rmw16CmpxchgUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW16Cmpxchg(unsignedTypeI32, imm), - ) - case wasm.OpcodeAtomicI64Rmw16CmpxchgU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw16CmpxchgUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMW16Cmpxchg(unsignedTypeI64, imm), - ) - case wasm.OpcodeAtomicI64Rmw32CmpxchgU: - imm, err := c.readMemoryArg(wasm.OpcodeAtomicI64Rmw32CmpxchgUName) - if err != nil { - return err - } - c.emit( - newOperationAtomicRMWCmpxchg(unsignedTypeI32, imm), - ) - default: - return fmt.Errorf("unsupported atomic instruction in interpreterir: %s", wasm.AtomicInstructionName(atomicOp)) - } - default: - return fmt.Errorf("unsupported instruction in interpreterir: 0x%x", op) - } - - // Move the program counter to point to the next instruction. - c.pc++ - return nil -} - -func (c *compiler) nextFrameID() (id uint32) { - id = c.currentFrameID + 1 - c.currentFrameID++ - return -} - -func (c *compiler) applyToStack(opcode wasm.Opcode) (index uint32, err error) { - switch opcode { - case - // These are the opcodes that is coupled with "index" immediate - // and it DOES affect the signature of opcode. - wasm.OpcodeCall, - wasm.OpcodeCallIndirect, - wasm.OpcodeLocalGet, - wasm.OpcodeLocalSet, - wasm.OpcodeLocalTee, - wasm.OpcodeGlobalGet, - wasm.OpcodeGlobalSet: - // Assumes that we are at the opcode now so skip it before read immediates. - v, num, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return 0, fmt.Errorf("reading immediates: %w", err) - } - c.pc += num - index = v - default: - // Note that other opcodes are free of index - // as it doesn't affect the signature of opt code. - // In other words, the "index" argument of wasmOpcodeSignature - // is ignored there. - } - - if c.unreachableState.on { - return 0, nil - } - - // Retrieve the signature of the opcode. - s, err := c.wasmOpcodeSignature(opcode, index) - if err != nil { - return 0, err - } - - // Manipulate the stack according to the signature. - // Note that the following algorithm assumes that - // the unknown type is unique in the signature, - // and is determined by the actual type on the stack. - // The determined type is stored in this typeParam. - var typeParam unsignedType - var typeParamFound bool - for i := range s.in { - want := s.in[len(s.in)-1-i] - actual := c.stackPop() - if want == unsignedTypeUnknown && typeParamFound { - want = typeParam - } else if want == unsignedTypeUnknown { - want = actual - typeParam = want - typeParamFound = true - } - if want != actual { - return 0, fmt.Errorf("input signature mismatch: want %s but have %s", want, actual) - } - } - - for _, target := range s.out { - if target == unsignedTypeUnknown && !typeParamFound { - return 0, fmt.Errorf("cannot determine type of unknown result") - } else if target == unsignedTypeUnknown { - c.stackPush(typeParam) - } else { - c.stackPush(target) - } - } - - return index, nil -} - -func (c *compiler) stackPeek() (ret unsignedType) { - ret = c.stack[len(c.stack)-1] - return -} - -func (c *compiler) stackSwitchAt(frame *controlFrame) { - c.stack = c.stack[:frame.originalStackLenWithoutParam] - c.stackLenInUint64 = frame.originalStackLenWithoutParamUint64 -} - -func (c *compiler) stackPop() (ret unsignedType) { - // No need to check stack bound - // as we can assume that all the operations - // are valid thanks to validateFunction - // at module validation phase. - ret = c.stack[len(c.stack)-1] - c.stack = c.stack[:len(c.stack)-1] - c.stackLenInUint64 -= 1 + int(unsignedTypeV128&ret>>2) - return -} - -func (c *compiler) stackPush(ts unsignedType) { - c.stack = append(c.stack, ts) - c.stackLenInUint64 += 1 + int(unsignedTypeV128&ts>>2) -} - -// emit adds the operations into the result. -func (c *compiler) emit(op unionOperation) { - if !c.unreachableState.on { - switch op.Kind { - case operationKindDrop: - // If the drop range is nil, - // we could remove such operations. - // That happens when drop operation is unnecessary. - // i.e. when there's no need to adjust stack before jmp. - if int64(op.U1) == -1 { - return - } - } - c.result.Operations = append(c.result.Operations, op) - if c.needSourceOffset { - c.result.IROperationSourceOffsetsInWasmBinary = append(c.result.IROperationSourceOffsetsInWasmBinary, - c.currentOpPC+c.bodyOffsetInCodeSection) - } - } -} - -// Emit const expression with default values of the given type. -func (c *compiler) emitDefaultValue(t wasm.ValueType) { - switch t { - case wasm.ValueTypeI32: - c.stackPush(unsignedTypeI32) - c.emit(newOperationConstI32(0)) - case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref: - c.stackPush(unsignedTypeI64) - c.emit(newOperationConstI64(0)) - case wasm.ValueTypeF32: - c.stackPush(unsignedTypeF32) - c.emit(newOperationConstF32(0)) - case wasm.ValueTypeF64: - c.stackPush(unsignedTypeF64) - c.emit(newOperationConstF64(0)) - case wasm.ValueTypeV128: - c.stackPush(unsignedTypeV128) - c.emit(newOperationV128Const(0, 0)) - } -} - -// Returns the "depth" (starting from top of the stack) -// of the n-th local. -func (c *compiler) localDepth(index wasm.Index) int { - height := c.localIndexToStackHeightInUint64[index] - return c.stackLenInUint64 - 1 - height -} - -func (c *compiler) localType(index wasm.Index) (t wasm.ValueType) { - if params := uint32(len(c.sig.Params)); index < params { - t = c.sig.Params[index] - } else { - t = c.localTypes[index-params] - } - return -} - -// getFrameDropRange returns the range (starting from top of the stack) that spans across the (uint64) stack. The range is -// supposed to be dropped from the stack when the given frame exists or branch into it. -// -// * frame is the control frame which the call-site is trying to branch into or exit. -// * isEnd true if the call-site is handling wasm.OpcodeEnd. -func (c *compiler) getFrameDropRange(frame *controlFrame, isEnd bool) inclusiveRange { - var start int - if !isEnd && frame.kind == controlFrameKindLoop { - // If this is not End and the call-site is trying to branch into the Loop control frame, - // we have to Start executing from the beginning of the loop block. - // Therefore, we have to pass the inputs to the frame. - start = frame.blockType.ParamNumInUint64 - } else { - start = frame.blockType.ResultNumInUint64 - } - end := c.stackLenInUint64 - 1 - frame.originalStackLenWithoutParamUint64 - if start <= end { - return inclusiveRange{Start: int32(start), End: int32(end)} - } else { - return nopinclusiveRange - } -} - -func (c *compiler) readMemoryArg(tag string) (memoryArg, error) { - c.result.UsesMemory = true - alignment, num, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return memoryArg{}, fmt.Errorf("reading alignment for %s: %w", tag, err) - } - c.pc += num - offset, num, err := leb128.LoadUint32(c.body[c.pc+1:]) - if err != nil { - return memoryArg{}, fmt.Errorf("reading offset for %s: %w", tag, err) - } - c.pc += num - return memoryArg{Offset: offset, Alignment: alignment}, nil -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/format.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/format.go deleted file mode 100644 index 8af1d94b0..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/format.go +++ /dev/null @@ -1,22 +0,0 @@ -package interpreter - -import ( - "bytes" -) - -func format(ops []unionOperation) string { - buf := bytes.NewBuffer(nil) - - _, _ = buf.WriteString(".entrypoint\n") - for i := range ops { - op := &ops[i] - str := op.String() - isLabel := op.Kind == operationKindLabel - if !isLabel { - const indent = "\t" - str = indent + str - } - _, _ = buf.WriteString(str + "\n") - } - return buf.String() -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go deleted file mode 100644 index 5b5e6e9d0..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go +++ /dev/null @@ -1,4596 +0,0 @@ -package interpreter - -import ( - "context" - "encoding/binary" - "errors" - "fmt" - "math" - "math/bits" - "sync" - "unsafe" - - "github.com/tetratelabs/wazero/api" - "github.com/tetratelabs/wazero/experimental" - "github.com/tetratelabs/wazero/internal/expctxkeys" - "github.com/tetratelabs/wazero/internal/filecache" - "github.com/tetratelabs/wazero/internal/internalapi" - "github.com/tetratelabs/wazero/internal/moremath" - "github.com/tetratelabs/wazero/internal/wasm" - "github.com/tetratelabs/wazero/internal/wasmdebug" - "github.com/tetratelabs/wazero/internal/wasmruntime" -) - -// callStackCeiling is the maximum WebAssembly call frame stack height. This allows wazero to raise -// wasm.ErrCallStackOverflow instead of overflowing the Go runtime. -// -// The default value should suffice for most use cases. Those wishing to change this can via `go build -ldflags`. -var callStackCeiling = 2000 - -// engine is an interpreter implementation of wasm.Engine -type engine struct { - enabledFeatures api.CoreFeatures - compiledFunctions map[wasm.ModuleID][]compiledFunction // guarded by mutex. - mux sync.RWMutex -} - -func NewEngine(_ context.Context, enabledFeatures api.CoreFeatures, _ filecache.Cache) wasm.Engine { - return &engine{ - enabledFeatures: enabledFeatures, - compiledFunctions: map[wasm.ModuleID][]compiledFunction{}, - } -} - -// Close implements the same method as documented on wasm.Engine. -func (e *engine) Close() (err error) { - return -} - -// CompiledModuleCount implements the same method as documented on wasm.Engine. -func (e *engine) CompiledModuleCount() uint32 { - return uint32(len(e.compiledFunctions)) -} - -// DeleteCompiledModule implements the same method as documented on wasm.Engine. -func (e *engine) DeleteCompiledModule(m *wasm.Module) { - e.deleteCompiledFunctions(m) -} - -func (e *engine) deleteCompiledFunctions(module *wasm.Module) { - e.mux.Lock() - defer e.mux.Unlock() - delete(e.compiledFunctions, module.ID) -} - -func (e *engine) addCompiledFunctions(module *wasm.Module, fs []compiledFunction) { - e.mux.Lock() - defer e.mux.Unlock() - e.compiledFunctions[module.ID] = fs -} - -func (e *engine) getCompiledFunctions(module *wasm.Module) (fs []compiledFunction, ok bool) { - e.mux.RLock() - defer e.mux.RUnlock() - fs, ok = e.compiledFunctions[module.ID] - return -} - -// moduleEngine implements wasm.ModuleEngine -type moduleEngine struct { - // codes are the compiled functions in a module instances. - // The index is module instance-scoped. - functions []function - - // parentEngine holds *engine from which this module engine is created from. - parentEngine *engine -} - -// GetGlobalValue implements the same method as documented on wasm.ModuleEngine. -func (e *moduleEngine) GetGlobalValue(wasm.Index) (lo, hi uint64) { - panic("BUG: GetGlobalValue should never be called on interpreter mode") -} - -// SetGlobalValue implements the same method as documented on wasm.ModuleEngine. -func (e *moduleEngine) SetGlobalValue(idx wasm.Index, lo, hi uint64) { - panic("BUG: SetGlobalValue should never be called on interpreter mode") -} - -// OwnsGlobals implements the same method as documented on wasm.ModuleEngine. -func (e *moduleEngine) OwnsGlobals() bool { return false } - -// MemoryGrown implements wasm.ModuleEngine. -func (e *moduleEngine) MemoryGrown() {} - -// callEngine holds context per moduleEngine.Call, and shared across all the -// function calls originating from the same moduleEngine.Call execution. -// -// This implements api.Function. -type callEngine struct { - internalapi.WazeroOnlyType - - // stack contains the operands. - // Note that all the values are represented as uint64. - stack []uint64 - - // frames are the function call stack. - frames []*callFrame - - // f is the initial function for this call engine. - f *function - - // stackiterator for Listeners to walk frames and stack. - stackIterator stackIterator -} - -func (e *moduleEngine) newCallEngine(compiled *function) *callEngine { - return &callEngine{f: compiled} -} - -func (ce *callEngine) pushValue(v uint64) { - ce.stack = append(ce.stack, v) -} - -func (ce *callEngine) pushValues(v []uint64) { - ce.stack = append(ce.stack, v...) -} - -func (ce *callEngine) popValue() (v uint64) { - // No need to check stack bound - // as we can assume that all the operations - // are valid thanks to validateFunction - // at module validation phase - // and interpreterir translation - // before compilation. - stackTopIndex := len(ce.stack) - 1 - v = ce.stack[stackTopIndex] - ce.stack = ce.stack[:stackTopIndex] - return -} - -func (ce *callEngine) popValues(v []uint64) { - stackTopIndex := len(ce.stack) - len(v) - copy(v, ce.stack[stackTopIndex:]) - ce.stack = ce.stack[:stackTopIndex] -} - -// peekValues peeks api.ValueType values from the stack and returns them. -func (ce *callEngine) peekValues(count int) []uint64 { - if count == 0 { - return nil - } - stackLen := len(ce.stack) - return ce.stack[stackLen-count : stackLen] -} - -func (ce *callEngine) drop(raw uint64) { - r := inclusiveRangeFromU64(raw) - if r.Start == -1 { - return - } else if r.Start == 0 { - ce.stack = ce.stack[:int32(len(ce.stack))-1-r.End] - } else { - newStack := ce.stack[:int32(len(ce.stack))-1-r.End] - newStack = append(newStack, ce.stack[int32(len(ce.stack))-r.Start:]...) - ce.stack = newStack - } -} - -func (ce *callEngine) pushFrame(frame *callFrame) { - if callStackCeiling <= len(ce.frames) { - panic(wasmruntime.ErrRuntimeStackOverflow) - } - ce.frames = append(ce.frames, frame) -} - -func (ce *callEngine) popFrame() (frame *callFrame) { - // No need to check stack bound as we can assume that all the operations are valid thanks to validateFunction at - // module validation phase and interpreterir translation before compilation. - oneLess := len(ce.frames) - 1 - frame = ce.frames[oneLess] - ce.frames = ce.frames[:oneLess] - return -} - -type callFrame struct { - // pc is the program counter representing the current position in code.body. - pc uint64 - // f is the compiled function used in this function frame. - f *function - // base index in the frame of this function, used to detect the count of - // values on the stack. - base int -} - -type compiledFunction struct { - source *wasm.Module - body []unionOperation - listener experimental.FunctionListener - offsetsInWasmBinary []uint64 - hostFn interface{} - ensureTermination bool - index wasm.Index -} - -type function struct { - funcType *wasm.FunctionType - moduleInstance *wasm.ModuleInstance - typeID wasm.FunctionTypeID - parent *compiledFunction -} - -// functionFromUintptr resurrects the original *function from the given uintptr -// which comes from either funcref table or OpcodeRefFunc instruction. -func functionFromUintptr(ptr uintptr) *function { - // Wraps ptrs as the double pointer in order to avoid the unsafe access as detected by race detector. - // - // For example, if we have (*function)(unsafe.Pointer(ptr)) instead, then the race detector's "checkptr" - // subroutine wanrs as "checkptr: pointer arithmetic result points to invalid allocation" - // https://github.com/golang/go/blob/1ce7fcf139417d618c2730010ede2afb41664211/src/runtime/checkptr.go#L69 - var wrapped *uintptr = &ptr - return *(**function)(unsafe.Pointer(wrapped)) -} - -type snapshot struct { - stack []uint64 - frames []*callFrame - pc uint64 - - ret []uint64 - - ce *callEngine -} - -// Snapshot implements the same method as documented on experimental.Snapshotter. -func (ce *callEngine) Snapshot() experimental.Snapshot { - stack := make([]uint64, len(ce.stack)) - copy(stack, ce.stack) - - frames := make([]*callFrame, len(ce.frames)) - copy(frames, ce.frames) - - return &snapshot{ - stack: stack, - frames: frames, - ce: ce, - } -} - -// Restore implements the same method as documented on experimental.Snapshot. -func (s *snapshot) Restore(ret []uint64) { - s.ret = ret - panic(s) -} - -func (s *snapshot) doRestore() { - ce := s.ce - - ce.stack = s.stack - ce.frames = s.frames - ce.frames[len(ce.frames)-1].pc = s.pc - - copy(ce.stack[len(ce.stack)-len(s.ret):], s.ret) -} - -// Error implements the same method on error. -func (s *snapshot) Error() string { - return "unhandled snapshot restore, this generally indicates restore was called from a different " + - "exported function invocation than snapshot" -} - -// stackIterator implements experimental.StackIterator. -type stackIterator struct { - stack []uint64 - frames []*callFrame - started bool - fn *function - pc uint64 -} - -func (si *stackIterator) reset(stack []uint64, frames []*callFrame, f *function) { - si.fn = f - si.pc = 0 - si.stack = stack - si.frames = frames - si.started = false -} - -func (si *stackIterator) clear() { - si.stack = nil - si.frames = nil - si.started = false - si.fn = nil -} - -// Next implements the same method as documented on experimental.StackIterator. -func (si *stackIterator) Next() bool { - if !si.started { - si.started = true - return true - } - - if len(si.frames) == 0 { - return false - } - - frame := si.frames[len(si.frames)-1] - si.stack = si.stack[:frame.base] - si.fn = frame.f - si.pc = frame.pc - si.frames = si.frames[:len(si.frames)-1] - return true -} - -// Function implements the same method as documented on -// experimental.StackIterator. -func (si *stackIterator) Function() experimental.InternalFunction { - return internalFunction{si.fn} -} - -// ProgramCounter implements the same method as documented on -// experimental.StackIterator. -func (si *stackIterator) ProgramCounter() experimental.ProgramCounter { - return experimental.ProgramCounter(si.pc) -} - -// internalFunction implements experimental.InternalFunction. -type internalFunction struct{ *function } - -// Definition implements the same method as documented on -// experimental.InternalFunction. -func (f internalFunction) Definition() api.FunctionDefinition { - return f.definition() -} - -// SourceOffsetForPC implements the same method as documented on -// experimental.InternalFunction. -func (f internalFunction) SourceOffsetForPC(pc experimental.ProgramCounter) uint64 { - offsetsMap := f.parent.offsetsInWasmBinary - if uint64(pc) < uint64(len(offsetsMap)) { - return offsetsMap[pc] - } - return 0 -} - -// interpreter mode doesn't maintain call frames in the stack, so pass the zero size to the IR. -const callFrameStackSize = 0 - -// CompileModule implements the same method as documented on wasm.Engine. -func (e *engine) CompileModule(_ context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) error { - if _, ok := e.getCompiledFunctions(module); ok { // cache hit! - return nil - } - - funcs := make([]compiledFunction, len(module.FunctionSection)) - irCompiler, err := newCompiler(e.enabledFeatures, callFrameStackSize, module, ensureTermination) - if err != nil { - return err - } - imported := module.ImportFunctionCount - for i := range module.CodeSection { - var lsn experimental.FunctionListener - if i < len(listeners) { - lsn = listeners[i] - } - - compiled := &funcs[i] - // If this is the host function, there's nothing to do as the runtime representation of - // host function in interpreter is its Go function itself as opposed to Wasm functions, - // which need to be compiled down to - if codeSeg := &module.CodeSection[i]; codeSeg.GoFunc != nil { - compiled.hostFn = codeSeg.GoFunc - } else { - ir, err := irCompiler.Next() - if err != nil { - return err - } - err = e.lowerIR(ir, compiled) - if err != nil { - def := module.FunctionDefinition(uint32(i) + module.ImportFunctionCount) - return fmt.Errorf("failed to lower func[%s] to interpreterir: %w", def.DebugName(), err) - } - } - compiled.source = module - compiled.ensureTermination = ensureTermination - compiled.listener = lsn - compiled.index = imported + uint32(i) - } - e.addCompiledFunctions(module, funcs) - return nil -} - -// NewModuleEngine implements the same method as documented on wasm.Engine. -func (e *engine) NewModuleEngine(module *wasm.Module, instance *wasm.ModuleInstance) (wasm.ModuleEngine, error) { - me := &moduleEngine{ - parentEngine: e, - functions: make([]function, len(module.FunctionSection)+int(module.ImportFunctionCount)), - } - - codes, ok := e.getCompiledFunctions(module) - if !ok { - return nil, errors.New("source module must be compiled before instantiation") - } - - for i := range codes { - c := &codes[i] - offset := i + int(module.ImportFunctionCount) - typeIndex := module.FunctionSection[i] - me.functions[offset] = function{ - moduleInstance: instance, - typeID: instance.TypeIDs[typeIndex], - funcType: &module.TypeSection[typeIndex], - parent: c, - } - } - return me, nil -} - -// lowerIR lowers the interpreterir operations to engine friendly struct. -func (e *engine) lowerIR(ir *compilationResult, ret *compiledFunction) error { - // Copy the body from the result. - ret.body = make([]unionOperation, len(ir.Operations)) - copy(ret.body, ir.Operations) - // Also copy the offsets if necessary. - if offsets := ir.IROperationSourceOffsetsInWasmBinary; len(offsets) > 0 { - ret.offsetsInWasmBinary = make([]uint64, len(offsets)) - copy(ret.offsetsInWasmBinary, offsets) - } - - labelAddressResolutions := [labelKindNum][]uint64{} - - // First, we iterate all labels, and resolve the address. - for i := range ret.body { - op := &ret.body[i] - switch op.Kind { - case operationKindLabel: - label := label(op.U1) - address := uint64(i) - - kind, fid := label.Kind(), label.FrameID() - frameToAddresses := labelAddressResolutions[label.Kind()] - // Expand the slice if necessary. - if diff := fid - len(frameToAddresses) + 1; diff > 0 { - for j := 0; j < diff; j++ { - frameToAddresses = append(frameToAddresses, 0) - } - } - frameToAddresses[fid] = address - labelAddressResolutions[kind] = frameToAddresses - } - } - - // Then resolve the label as the index to the body. - for i := range ret.body { - op := &ret.body[i] - switch op.Kind { - case operationKindBr: - e.setLabelAddress(&op.U1, label(op.U1), labelAddressResolutions) - case operationKindBrIf: - e.setLabelAddress(&op.U1, label(op.U1), labelAddressResolutions) - e.setLabelAddress(&op.U2, label(op.U2), labelAddressResolutions) - case operationKindBrTable: - for j := 0; j < len(op.Us); j += 2 { - target := op.Us[j] - e.setLabelAddress(&op.Us[j], label(target), labelAddressResolutions) - } - } - } - return nil -} - -func (e *engine) setLabelAddress(op *uint64, label label, labelAddressResolutions [labelKindNum][]uint64) { - if label.IsReturnTarget() { - // Jmp to the end of the possible binary. - *op = math.MaxUint64 - } else { - *op = labelAddressResolutions[label.Kind()][label.FrameID()] - } -} - -// ResolveImportedFunction implements wasm.ModuleEngine. -func (e *moduleEngine) ResolveImportedFunction(index, descFunc, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) { - imported := importedModuleEngine.(*moduleEngine) - e.functions[index] = imported.functions[indexInImportedModule] -} - -// ResolveImportedMemory implements wasm.ModuleEngine. -func (e *moduleEngine) ResolveImportedMemory(wasm.ModuleEngine) {} - -// DoneInstantiation implements wasm.ModuleEngine. -func (e *moduleEngine) DoneInstantiation() {} - -// FunctionInstanceReference implements the same method as documented on wasm.ModuleEngine. -func (e *moduleEngine) FunctionInstanceReference(funcIndex wasm.Index) wasm.Reference { - return uintptr(unsafe.Pointer(&e.functions[funcIndex])) -} - -// NewFunction implements the same method as documented on wasm.ModuleEngine. -func (e *moduleEngine) NewFunction(index wasm.Index) (ce api.Function) { - // Note: The input parameters are pre-validated, so a compiled function is only absent on close. Updates to - // code on close aren't locked, neither is this read. - compiled := &e.functions[index] - return e.newCallEngine(compiled) -} - -// LookupFunction implements the same method as documented on wasm.ModuleEngine. -func (e *moduleEngine) LookupFunction(t *wasm.TableInstance, typeId wasm.FunctionTypeID, tableOffset wasm.Index) (*wasm.ModuleInstance, wasm.Index) { - if tableOffset >= uint32(len(t.References)) { - panic(wasmruntime.ErrRuntimeInvalidTableAccess) - } - rawPtr := t.References[tableOffset] - if rawPtr == 0 { - panic(wasmruntime.ErrRuntimeInvalidTableAccess) - } - - tf := functionFromUintptr(rawPtr) - if tf.typeID != typeId { - panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch) - } - return tf.moduleInstance, tf.parent.index -} - -// Definition implements the same method as documented on api.Function. -func (ce *callEngine) Definition() api.FunctionDefinition { - return ce.f.definition() -} - -func (f *function) definition() api.FunctionDefinition { - compiled := f.parent - return compiled.source.FunctionDefinition(compiled.index) -} - -// Call implements the same method as documented on api.Function. -func (ce *callEngine) Call(ctx context.Context, params ...uint64) (results []uint64, err error) { - ft := ce.f.funcType - if n := ft.ParamNumInUint64; n != len(params) { - return nil, fmt.Errorf("expected %d params, but passed %d", n, len(params)) - } - return ce.call(ctx, params, nil) -} - -// CallWithStack implements the same method as documented on api.Function. -func (ce *callEngine) CallWithStack(ctx context.Context, stack []uint64) error { - params, results, err := wasm.SplitCallStack(ce.f.funcType, stack) - if err != nil { - return err - } - _, err = ce.call(ctx, params, results) - return err -} - -func (ce *callEngine) call(ctx context.Context, params, results []uint64) (_ []uint64, err error) { - m := ce.f.moduleInstance - if ce.f.parent.ensureTermination { - select { - case <-ctx.Done(): - // If the provided context is already done, close the call context - // and return the error. - m.CloseWithCtxErr(ctx) - return nil, m.FailIfClosed() - default: - } - } - - if ctx.Value(expctxkeys.EnableSnapshotterKey{}) != nil { - ctx = context.WithValue(ctx, expctxkeys.SnapshotterKey{}, ce) - } - - defer func() { - // If the module closed during the call, and the call didn't err for another reason, set an ExitError. - if err == nil { - err = m.FailIfClosed() - } - // TODO: ^^ Will not fail if the function was imported from a closed module. - - if v := recover(); v != nil { - err = ce.recoverOnCall(ctx, m, v) - } - }() - - ce.pushValues(params) - - if ce.f.parent.ensureTermination { - done := m.CloseModuleOnCanceledOrTimeout(ctx) - defer done() - } - - ce.callFunction(ctx, m, ce.f) - - // This returns a safe copy of the results, instead of a slice view. If we - // returned a re-slice, the caller could accidentally or purposefully - // corrupt the stack of subsequent calls. - ft := ce.f.funcType - if results == nil && ft.ResultNumInUint64 > 0 { - results = make([]uint64, ft.ResultNumInUint64) - } - ce.popValues(results) - return results, nil -} - -// functionListenerInvocation captures arguments needed to perform function -// listener invocations when unwinding the call stack. -type functionListenerInvocation struct { - experimental.FunctionListener - def api.FunctionDefinition -} - -// recoverOnCall takes the recovered value `recoverOnCall`, and wraps it -// with the call frame stack traces. Also, reset the state of callEngine -// so that it can be used for the subsequent calls. -func (ce *callEngine) recoverOnCall(ctx context.Context, m *wasm.ModuleInstance, v interface{}) (err error) { - if s, ok := v.(*snapshot); ok { - // A snapshot that wasn't handled was created by a different call engine possibly from a nested wasm invocation, - // let it propagate up to be handled by the caller. - panic(s) - } - - builder := wasmdebug.NewErrorBuilder() - frameCount := len(ce.frames) - functionListeners := make([]functionListenerInvocation, 0, 16) - - if frameCount > wasmdebug.MaxFrames { - frameCount = wasmdebug.MaxFrames - } - for i := 0; i < frameCount; i++ { - frame := ce.popFrame() - f := frame.f - def := f.definition() - var sources []string - if parent := frame.f.parent; parent.body != nil && len(parent.offsetsInWasmBinary) > 0 { - sources = parent.source.DWARFLines.Line(parent.offsetsInWasmBinary[frame.pc]) - } - builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sources) - if f.parent.listener != nil { - functionListeners = append(functionListeners, functionListenerInvocation{ - FunctionListener: f.parent.listener, - def: f.definition(), - }) - } - } - - err = builder.FromRecovered(v) - for i := range functionListeners { - functionListeners[i].Abort(ctx, m, functionListeners[i].def, err) - } - - // Allows the reuse of CallEngine. - ce.stack, ce.frames = ce.stack[:0], ce.frames[:0] - return -} - -func (ce *callEngine) callFunction(ctx context.Context, m *wasm.ModuleInstance, f *function) { - if f.parent.hostFn != nil { - ce.callGoFuncWithStack(ctx, m, f) - } else if lsn := f.parent.listener; lsn != nil { - ce.callNativeFuncWithListener(ctx, m, f, lsn) - } else { - ce.callNativeFunc(ctx, m, f) - } -} - -func (ce *callEngine) callGoFunc(ctx context.Context, m *wasm.ModuleInstance, f *function, stack []uint64) { - typ := f.funcType - lsn := f.parent.listener - if lsn != nil { - params := stack[:typ.ParamNumInUint64] - ce.stackIterator.reset(ce.stack, ce.frames, f) - lsn.Before(ctx, m, f.definition(), params, &ce.stackIterator) - ce.stackIterator.clear() - } - frame := &callFrame{f: f, base: len(ce.stack)} - ce.pushFrame(frame) - - fn := f.parent.hostFn - switch fn := fn.(type) { - case api.GoModuleFunction: - fn.Call(ctx, m, stack) - case api.GoFunction: - fn.Call(ctx, stack) - } - - ce.popFrame() - if lsn != nil { - // TODO: This doesn't get the error due to use of panic to propagate them. - results := stack[:typ.ResultNumInUint64] - lsn.After(ctx, m, f.definition(), results) - } -} - -func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance, f *function) { - frame := &callFrame{f: f, base: len(ce.stack)} - moduleInst := f.moduleInstance - functions := moduleInst.Engine.(*moduleEngine).functions - memoryInst := moduleInst.MemoryInstance - globals := moduleInst.Globals - tables := moduleInst.Tables - typeIDs := moduleInst.TypeIDs - dataInstances := moduleInst.DataInstances - elementInstances := moduleInst.ElementInstances - ce.pushFrame(frame) - body := frame.f.parent.body - bodyLen := uint64(len(body)) - for frame.pc < bodyLen { - op := &body[frame.pc] - // TODO: add description of each operation/case - // on, for example, how many args are used, - // how the stack is modified, etc. - switch op.Kind { - case operationKindBuiltinFunctionCheckExitCode: - if err := m.FailIfClosed(); err != nil { - panic(err) - } - frame.pc++ - case operationKindUnreachable: - panic(wasmruntime.ErrRuntimeUnreachable) - case operationKindBr: - frame.pc = op.U1 - case operationKindBrIf: - if ce.popValue() > 0 { - ce.drop(op.U3) - frame.pc = op.U1 - } else { - frame.pc = op.U2 - } - case operationKindBrTable: - v := ce.popValue() - defaultAt := uint64(len(op.Us))/2 - 1 - if v > defaultAt { - v = defaultAt - } - v *= 2 - ce.drop(op.Us[v+1]) - frame.pc = op.Us[v] - case operationKindCall: - func() { - if ctx.Value(expctxkeys.EnableSnapshotterKey{}) != nil { - defer func() { - if r := recover(); r != nil { - if s, ok := r.(*snapshot); ok && s.ce == ce { - s.doRestore() - frame = ce.frames[len(ce.frames)-1] - body = frame.f.parent.body - bodyLen = uint64(len(body)) - } else { - panic(r) - } - } - }() - } - ce.callFunction(ctx, f.moduleInstance, &functions[op.U1]) - }() - frame.pc++ - case operationKindCallIndirect: - offset := ce.popValue() - table := tables[op.U2] - if offset >= uint64(len(table.References)) { - panic(wasmruntime.ErrRuntimeInvalidTableAccess) - } - rawPtr := table.References[offset] - if rawPtr == 0 { - panic(wasmruntime.ErrRuntimeInvalidTableAccess) - } - - tf := functionFromUintptr(rawPtr) - if tf.typeID != typeIDs[op.U1] { - panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch) - } - - ce.callFunction(ctx, f.moduleInstance, tf) - frame.pc++ - case operationKindDrop: - ce.drop(op.U1) - frame.pc++ - case operationKindSelect: - c := ce.popValue() - if op.B3 { // Target is vector. - x2Hi, x2Lo := ce.popValue(), ce.popValue() - if c == 0 { - _, _ = ce.popValue(), ce.popValue() // discard the x1's lo and hi bits. - ce.pushValue(x2Lo) - ce.pushValue(x2Hi) - } - } else { - v2 := ce.popValue() - if c == 0 { - _ = ce.popValue() - ce.pushValue(v2) - } - } - frame.pc++ - case operationKindPick: - index := len(ce.stack) - 1 - int(op.U1) - ce.pushValue(ce.stack[index]) - if op.B3 { // V128 value target. - ce.pushValue(ce.stack[index+1]) - } - frame.pc++ - case operationKindSet: - if op.B3 { // V128 value target. - lowIndex := len(ce.stack) - 1 - int(op.U1) - highIndex := lowIndex + 1 - hi, lo := ce.popValue(), ce.popValue() - ce.stack[lowIndex], ce.stack[highIndex] = lo, hi - } else { - index := len(ce.stack) - 1 - int(op.U1) - ce.stack[index] = ce.popValue() - } - frame.pc++ - case operationKindGlobalGet: - g := globals[op.U1] - ce.pushValue(g.Val) - if g.Type.ValType == wasm.ValueTypeV128 { - ce.pushValue(g.ValHi) - } - frame.pc++ - case operationKindGlobalSet: - g := globals[op.U1] - if g.Type.ValType == wasm.ValueTypeV128 { - g.ValHi = ce.popValue() - } - g.Val = ce.popValue() - frame.pc++ - case operationKindLoad: - offset := ce.popMemoryOffset(op) - switch unsignedType(op.B1) { - case unsignedTypeI32, unsignedTypeF32: - if val, ok := memoryInst.ReadUint32Le(offset); !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } else { - ce.pushValue(uint64(val)) - } - case unsignedTypeI64, unsignedTypeF64: - if val, ok := memoryInst.ReadUint64Le(offset); !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } else { - ce.pushValue(val) - } - } - frame.pc++ - case operationKindLoad8: - val, ok := memoryInst.ReadByte(ce.popMemoryOffset(op)) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - - switch signedInt(op.B1) { - case signedInt32: - ce.pushValue(uint64(uint32(int8(val)))) - case signedInt64: - ce.pushValue(uint64(int8(val))) - case signedUint32, signedUint64: - ce.pushValue(uint64(val)) - } - frame.pc++ - case operationKindLoad16: - - val, ok := memoryInst.ReadUint16Le(ce.popMemoryOffset(op)) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - - switch signedInt(op.B1) { - case signedInt32: - ce.pushValue(uint64(uint32(int16(val)))) - case signedInt64: - ce.pushValue(uint64(int16(val))) - case signedUint32, signedUint64: - ce.pushValue(uint64(val)) - } - frame.pc++ - case operationKindLoad32: - val, ok := memoryInst.ReadUint32Le(ce.popMemoryOffset(op)) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - - if op.B1 == 1 { // Signed - ce.pushValue(uint64(int32(val))) - } else { - ce.pushValue(uint64(val)) - } - frame.pc++ - case operationKindStore: - val := ce.popValue() - offset := ce.popMemoryOffset(op) - switch unsignedType(op.B1) { - case unsignedTypeI32, unsignedTypeF32: - if !memoryInst.WriteUint32Le(offset, uint32(val)) { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - case unsignedTypeI64, unsignedTypeF64: - if !memoryInst.WriteUint64Le(offset, val) { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - } - frame.pc++ - case operationKindStore8: - val := byte(ce.popValue()) - offset := ce.popMemoryOffset(op) - if !memoryInst.WriteByte(offset, val) { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - frame.pc++ - case operationKindStore16: - val := uint16(ce.popValue()) - offset := ce.popMemoryOffset(op) - if !memoryInst.WriteUint16Le(offset, val) { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - frame.pc++ - case operationKindStore32: - val := uint32(ce.popValue()) - offset := ce.popMemoryOffset(op) - if !memoryInst.WriteUint32Le(offset, val) { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - frame.pc++ - case operationKindMemorySize: - ce.pushValue(uint64(memoryInst.Pages())) - frame.pc++ - case operationKindMemoryGrow: - n := ce.popValue() - if res, ok := memoryInst.Grow(uint32(n)); !ok { - ce.pushValue(uint64(0xffffffff)) // = -1 in signed 32-bit integer. - } else { - ce.pushValue(uint64(res)) - } - frame.pc++ - case operationKindConstI32, operationKindConstI64, - operationKindConstF32, operationKindConstF64: - ce.pushValue(op.U1) - frame.pc++ - case operationKindEq: - var b bool - switch unsignedType(op.B1) { - case unsignedTypeI32: - v2, v1 := ce.popValue(), ce.popValue() - b = uint32(v1) == uint32(v2) - case unsignedTypeI64: - v2, v1 := ce.popValue(), ce.popValue() - b = v1 == v2 - case unsignedTypeF32: - v2, v1 := ce.popValue(), ce.popValue() - b = math.Float32frombits(uint32(v2)) == math.Float32frombits(uint32(v1)) - case unsignedTypeF64: - v2, v1 := ce.popValue(), ce.popValue() - b = math.Float64frombits(v2) == math.Float64frombits(v1) - } - if b { - ce.pushValue(1) - } else { - ce.pushValue(0) - } - frame.pc++ - case operationKindNe: - var b bool - switch unsignedType(op.B1) { - case unsignedTypeI32, unsignedTypeI64: - v2, v1 := ce.popValue(), ce.popValue() - b = v1 != v2 - case unsignedTypeF32: - v2, v1 := ce.popValue(), ce.popValue() - b = math.Float32frombits(uint32(v2)) != math.Float32frombits(uint32(v1)) - case unsignedTypeF64: - v2, v1 := ce.popValue(), ce.popValue() - b = math.Float64frombits(v2) != math.Float64frombits(v1) - } - if b { - ce.pushValue(1) - } else { - ce.pushValue(0) - } - frame.pc++ - case operationKindEqz: - if ce.popValue() == 0 { - ce.pushValue(1) - } else { - ce.pushValue(0) - } - frame.pc++ - case operationKindLt: - v2 := ce.popValue() - v1 := ce.popValue() - var b bool - switch signedType(op.B1) { - case signedTypeInt32: - b = int32(v1) < int32(v2) - case signedTypeInt64: - b = int64(v1) < int64(v2) - case signedTypeUint32, signedTypeUint64: - b = v1 < v2 - case signedTypeFloat32: - b = math.Float32frombits(uint32(v1)) < math.Float32frombits(uint32(v2)) - case signedTypeFloat64: - b = math.Float64frombits(v1) < math.Float64frombits(v2) - } - if b { - ce.pushValue(1) - } else { - ce.pushValue(0) - } - frame.pc++ - case operationKindGt: - v2 := ce.popValue() - v1 := ce.popValue() - var b bool - switch signedType(op.B1) { - case signedTypeInt32: - b = int32(v1) > int32(v2) - case signedTypeInt64: - b = int64(v1) > int64(v2) - case signedTypeUint32, signedTypeUint64: - b = v1 > v2 - case signedTypeFloat32: - b = math.Float32frombits(uint32(v1)) > math.Float32frombits(uint32(v2)) - case signedTypeFloat64: - b = math.Float64frombits(v1) > math.Float64frombits(v2) - } - if b { - ce.pushValue(1) - } else { - ce.pushValue(0) - } - frame.pc++ - case operationKindLe: - v2 := ce.popValue() - v1 := ce.popValue() - var b bool - switch signedType(op.B1) { - case signedTypeInt32: - b = int32(v1) <= int32(v2) - case signedTypeInt64: - b = int64(v1) <= int64(v2) - case signedTypeUint32, signedTypeUint64: - b = v1 <= v2 - case signedTypeFloat32: - b = math.Float32frombits(uint32(v1)) <= math.Float32frombits(uint32(v2)) - case signedTypeFloat64: - b = math.Float64frombits(v1) <= math.Float64frombits(v2) - } - if b { - ce.pushValue(1) - } else { - ce.pushValue(0) - } - frame.pc++ - case operationKindGe: - v2 := ce.popValue() - v1 := ce.popValue() - var b bool - switch signedType(op.B1) { - case signedTypeInt32: - b = int32(v1) >= int32(v2) - case signedTypeInt64: - b = int64(v1) >= int64(v2) - case signedTypeUint32, signedTypeUint64: - b = v1 >= v2 - case signedTypeFloat32: - b = math.Float32frombits(uint32(v1)) >= math.Float32frombits(uint32(v2)) - case signedTypeFloat64: - b = math.Float64frombits(v1) >= math.Float64frombits(v2) - } - if b { - ce.pushValue(1) - } else { - ce.pushValue(0) - } - frame.pc++ - case operationKindAdd: - v2 := ce.popValue() - v1 := ce.popValue() - switch unsignedType(op.B1) { - case unsignedTypeI32: - v := uint32(v1) + uint32(v2) - ce.pushValue(uint64(v)) - case unsignedTypeI64: - ce.pushValue(v1 + v2) - case unsignedTypeF32: - ce.pushValue(addFloat32bits(uint32(v1), uint32(v2))) - case unsignedTypeF64: - v := math.Float64frombits(v1) + math.Float64frombits(v2) - ce.pushValue(math.Float64bits(v)) - } - frame.pc++ - case operationKindSub: - v2 := ce.popValue() - v1 := ce.popValue() - switch unsignedType(op.B1) { - case unsignedTypeI32: - ce.pushValue(uint64(uint32(v1) - uint32(v2))) - case unsignedTypeI64: - ce.pushValue(v1 - v2) - case unsignedTypeF32: - ce.pushValue(subFloat32bits(uint32(v1), uint32(v2))) - case unsignedTypeF64: - v := math.Float64frombits(v1) - math.Float64frombits(v2) - ce.pushValue(math.Float64bits(v)) - } - frame.pc++ - case operationKindMul: - v2 := ce.popValue() - v1 := ce.popValue() - switch unsignedType(op.B1) { - case unsignedTypeI32: - ce.pushValue(uint64(uint32(v1) * uint32(v2))) - case unsignedTypeI64: - ce.pushValue(v1 * v2) - case unsignedTypeF32: - ce.pushValue(mulFloat32bits(uint32(v1), uint32(v2))) - case unsignedTypeF64: - v := math.Float64frombits(v2) * math.Float64frombits(v1) - ce.pushValue(math.Float64bits(v)) - } - frame.pc++ - case operationKindClz: - v := ce.popValue() - if op.B1 == 0 { - // unsignedInt32 - ce.pushValue(uint64(bits.LeadingZeros32(uint32(v)))) - } else { - // unsignedInt64 - ce.pushValue(uint64(bits.LeadingZeros64(v))) - } - frame.pc++ - case operationKindCtz: - v := ce.popValue() - if op.B1 == 0 { - // unsignedInt32 - ce.pushValue(uint64(bits.TrailingZeros32(uint32(v)))) - } else { - // unsignedInt64 - ce.pushValue(uint64(bits.TrailingZeros64(v))) - } - frame.pc++ - case operationKindPopcnt: - v := ce.popValue() - if op.B1 == 0 { - // unsignedInt32 - ce.pushValue(uint64(bits.OnesCount32(uint32(v)))) - } else { - // unsignedInt64 - ce.pushValue(uint64(bits.OnesCount64(v))) - } - frame.pc++ - case operationKindDiv: - // If an integer, check we won't divide by zero. - t := signedType(op.B1) - v2, v1 := ce.popValue(), ce.popValue() - switch t { - case signedTypeFloat32, signedTypeFloat64: // not integers - default: - if v2 == 0 { - panic(wasmruntime.ErrRuntimeIntegerDivideByZero) - } - } - - switch t { - case signedTypeInt32: - d := int32(v2) - n := int32(v1) - if n == math.MinInt32 && d == -1 { - panic(wasmruntime.ErrRuntimeIntegerOverflow) - } - ce.pushValue(uint64(uint32(n / d))) - case signedTypeInt64: - d := int64(v2) - n := int64(v1) - if n == math.MinInt64 && d == -1 { - panic(wasmruntime.ErrRuntimeIntegerOverflow) - } - ce.pushValue(uint64(n / d)) - case signedTypeUint32: - d := uint32(v2) - n := uint32(v1) - ce.pushValue(uint64(n / d)) - case signedTypeUint64: - d := v2 - n := v1 - ce.pushValue(n / d) - case signedTypeFloat32: - ce.pushValue(divFloat32bits(uint32(v1), uint32(v2))) - case signedTypeFloat64: - ce.pushValue(math.Float64bits(math.Float64frombits(v1) / math.Float64frombits(v2))) - } - frame.pc++ - case operationKindRem: - v2, v1 := ce.popValue(), ce.popValue() - if v2 == 0 { - panic(wasmruntime.ErrRuntimeIntegerDivideByZero) - } - switch signedInt(op.B1) { - case signedInt32: - d := int32(v2) - n := int32(v1) - ce.pushValue(uint64(uint32(n % d))) - case signedInt64: - d := int64(v2) - n := int64(v1) - ce.pushValue(uint64(n % d)) - case signedUint32: - d := uint32(v2) - n := uint32(v1) - ce.pushValue(uint64(n % d)) - case signedUint64: - d := v2 - n := v1 - ce.pushValue(n % d) - } - frame.pc++ - case operationKindAnd: - v2 := ce.popValue() - v1 := ce.popValue() - if op.B1 == 0 { - // unsignedInt32 - ce.pushValue(uint64(uint32(v2) & uint32(v1))) - } else { - // unsignedInt64 - ce.pushValue(uint64(v2 & v1)) - } - frame.pc++ - case operationKindOr: - v2 := ce.popValue() - v1 := ce.popValue() - if op.B1 == 0 { - // unsignedInt32 - ce.pushValue(uint64(uint32(v2) | uint32(v1))) - } else { - // unsignedInt64 - ce.pushValue(uint64(v2 | v1)) - } - frame.pc++ - case operationKindXor: - v2 := ce.popValue() - v1 := ce.popValue() - if op.B1 == 0 { - // unsignedInt32 - ce.pushValue(uint64(uint32(v2) ^ uint32(v1))) - } else { - // unsignedInt64 - ce.pushValue(uint64(v2 ^ v1)) - } - frame.pc++ - case operationKindShl: - v2 := ce.popValue() - v1 := ce.popValue() - if op.B1 == 0 { - // unsignedInt32 - ce.pushValue(uint64(uint32(v1) << (uint32(v2) % 32))) - } else { - // unsignedInt64 - ce.pushValue(v1 << (v2 % 64)) - } - frame.pc++ - case operationKindShr: - v2 := ce.popValue() - v1 := ce.popValue() - switch signedInt(op.B1) { - case signedInt32: - ce.pushValue(uint64(uint32(int32(v1) >> (uint32(v2) % 32)))) - case signedInt64: - ce.pushValue(uint64(int64(v1) >> (v2 % 64))) - case signedUint32: - ce.pushValue(uint64(uint32(v1) >> (uint32(v2) % 32))) - case signedUint64: - ce.pushValue(v1 >> (v2 % 64)) - } - frame.pc++ - case operationKindRotl: - v2 := ce.popValue() - v1 := ce.popValue() - if op.B1 == 0 { - // unsignedInt32 - ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), int(v2)))) - } else { - // unsignedInt64 - ce.pushValue(uint64(bits.RotateLeft64(v1, int(v2)))) - } - frame.pc++ - case operationKindRotr: - v2 := ce.popValue() - v1 := ce.popValue() - if op.B1 == 0 { - // unsignedInt32 - ce.pushValue(uint64(bits.RotateLeft32(uint32(v1), -int(v2)))) - } else { - // unsignedInt64 - ce.pushValue(uint64(bits.RotateLeft64(v1, -int(v2)))) - } - frame.pc++ - case operationKindAbs: - if op.B1 == 0 { - // float32 - const mask uint32 = 1 << 31 - ce.pushValue(uint64(uint32(ce.popValue()) &^ mask)) - } else { - // float64 - const mask uint64 = 1 << 63 - ce.pushValue(ce.popValue() &^ mask) - } - frame.pc++ - case operationKindNeg: - if op.B1 == 0 { - // float32 - v := -math.Float32frombits(uint32(ce.popValue())) - ce.pushValue(uint64(math.Float32bits(v))) - } else { - // float64 - v := -math.Float64frombits(ce.popValue()) - ce.pushValue(math.Float64bits(v)) - } - frame.pc++ - case operationKindCeil: - if op.B1 == 0 { - // float32 - v := moremath.WasmCompatCeilF32(math.Float32frombits(uint32(ce.popValue()))) - ce.pushValue(uint64(math.Float32bits(v))) - } else { - // float64 - v := moremath.WasmCompatCeilF64(math.Float64frombits(ce.popValue())) - ce.pushValue(math.Float64bits(v)) - } - frame.pc++ - case operationKindFloor: - if op.B1 == 0 { - // float32 - v := moremath.WasmCompatFloorF32(math.Float32frombits(uint32(ce.popValue()))) - ce.pushValue(uint64(math.Float32bits(v))) - } else { - // float64 - v := moremath.WasmCompatFloorF64(math.Float64frombits(ce.popValue())) - ce.pushValue(math.Float64bits(v)) - } - frame.pc++ - case operationKindTrunc: - if op.B1 == 0 { - // float32 - v := moremath.WasmCompatTruncF32(math.Float32frombits(uint32(ce.popValue()))) - ce.pushValue(uint64(math.Float32bits(v))) - } else { - // float64 - v := moremath.WasmCompatTruncF64(math.Float64frombits(ce.popValue())) - ce.pushValue(math.Float64bits(v)) - } - frame.pc++ - case operationKindNearest: - if op.B1 == 0 { - // float32 - f := math.Float32frombits(uint32(ce.popValue())) - ce.pushValue(uint64(math.Float32bits(moremath.WasmCompatNearestF32(f)))) - } else { - // float64 - f := math.Float64frombits(ce.popValue()) - ce.pushValue(math.Float64bits(moremath.WasmCompatNearestF64(f))) - } - frame.pc++ - case operationKindSqrt: - if op.B1 == 0 { - // float32 - v := math.Sqrt(float64(math.Float32frombits(uint32(ce.popValue())))) - ce.pushValue(uint64(math.Float32bits(float32(v)))) - } else { - // float64 - v := math.Sqrt(math.Float64frombits(ce.popValue())) - ce.pushValue(math.Float64bits(v)) - } - frame.pc++ - case operationKindMin: - if op.B1 == 0 { - // float32 - ce.pushValue(wasmCompatMin32bits(uint32(ce.popValue()), uint32(ce.popValue()))) - } else { - v2 := math.Float64frombits(ce.popValue()) - v1 := math.Float64frombits(ce.popValue()) - ce.pushValue(math.Float64bits(moremath.WasmCompatMin64(v1, v2))) - } - frame.pc++ - case operationKindMax: - if op.B1 == 0 { - ce.pushValue(wasmCompatMax32bits(uint32(ce.popValue()), uint32(ce.popValue()))) - } else { - // float64 - v2 := math.Float64frombits(ce.popValue()) - v1 := math.Float64frombits(ce.popValue()) - ce.pushValue(math.Float64bits(moremath.WasmCompatMax64(v1, v2))) - } - frame.pc++ - case operationKindCopysign: - if op.B1 == 0 { - // float32 - v2 := uint32(ce.popValue()) - v1 := uint32(ce.popValue()) - const signbit = 1 << 31 - ce.pushValue(uint64(v1&^signbit | v2&signbit)) - } else { - // float64 - v2 := ce.popValue() - v1 := ce.popValue() - const signbit = 1 << 63 - ce.pushValue(v1&^signbit | v2&signbit) - } - frame.pc++ - case operationKindI32WrapFromI64: - ce.pushValue(uint64(uint32(ce.popValue()))) - frame.pc++ - case operationKindITruncFromF: - if op.B1 == 0 { - // float32 - switch signedInt(op.B2) { - case signedInt32: - v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue())))) - if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN - if op.B3 { - // non-trapping conversion must cast nan to zero. - v = 0 - } else { - panic(wasmruntime.ErrRuntimeInvalidConversionToInteger) - } - } else if v < math.MinInt32 || v > math.MaxInt32 { - if op.B3 { - // non-trapping conversion must "saturate" the value for overflowing sources. - if v < 0 { - v = math.MinInt32 - } else { - v = math.MaxInt32 - } - } else { - panic(wasmruntime.ErrRuntimeIntegerOverflow) - } - } - ce.pushValue(uint64(uint32(int32(v)))) - case signedInt64: - v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue())))) - res := int64(v) - if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN - if op.B3 { - // non-trapping conversion must cast nan to zero. - res = 0 - } else { - panic(wasmruntime.ErrRuntimeInvalidConversionToInteger) - } - } else if v < math.MinInt64 || v >= math.MaxInt64 { - // Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation, - // and that's why we use '>=' not '>' to check overflow. - if op.B3 { - // non-trapping conversion must "saturate" the value for overflowing sources. - if v < 0 { - res = math.MinInt64 - } else { - res = math.MaxInt64 - } - } else { - panic(wasmruntime.ErrRuntimeIntegerOverflow) - } - } - ce.pushValue(uint64(res)) - case signedUint32: - v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue())))) - if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN - if op.B3 { - // non-trapping conversion must cast nan to zero. - v = 0 - } else { - panic(wasmruntime.ErrRuntimeInvalidConversionToInteger) - } - } else if v < 0 || v > math.MaxUint32 { - if op.B3 { - // non-trapping conversion must "saturate" the value for overflowing source. - if v < 0 { - v = 0 - } else { - v = math.MaxUint32 - } - } else { - panic(wasmruntime.ErrRuntimeIntegerOverflow) - } - } - ce.pushValue(uint64(uint32(v))) - case signedUint64: - v := math.Trunc(float64(math.Float32frombits(uint32(ce.popValue())))) - res := uint64(v) - if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN - if op.B3 { - // non-trapping conversion must cast nan to zero. - res = 0 - } else { - panic(wasmruntime.ErrRuntimeInvalidConversionToInteger) - } - } else if v < 0 || v >= math.MaxUint64 { - // Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation, - // and that's why we use '>=' not '>' to check overflow. - if op.B3 { - // non-trapping conversion must "saturate" the value for overflowing source. - if v < 0 { - res = 0 - } else { - res = math.MaxUint64 - } - } else { - panic(wasmruntime.ErrRuntimeIntegerOverflow) - } - } - ce.pushValue(res) - } - } else { - // float64 - switch signedInt(op.B2) { - case signedInt32: - v := math.Trunc(math.Float64frombits(ce.popValue())) - if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN - if op.B3 { - // non-trapping conversion must cast nan to zero. - v = 0 - } else { - panic(wasmruntime.ErrRuntimeInvalidConversionToInteger) - } - } else if v < math.MinInt32 || v > math.MaxInt32 { - if op.B3 { - // non-trapping conversion must "saturate" the value for overflowing source. - if v < 0 { - v = math.MinInt32 - } else { - v = math.MaxInt32 - } - } else { - panic(wasmruntime.ErrRuntimeIntegerOverflow) - } - } - ce.pushValue(uint64(uint32(int32(v)))) - case signedInt64: - v := math.Trunc(math.Float64frombits(ce.popValue())) - res := int64(v) - if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN - if op.B3 { - // non-trapping conversion must cast nan to zero. - res = 0 - } else { - panic(wasmruntime.ErrRuntimeInvalidConversionToInteger) - } - } else if v < math.MinInt64 || v >= math.MaxInt64 { - // Note: math.MaxInt64 is rounded up to math.MaxInt64+1 in 64-bit float representation, - // and that's why we use '>=' not '>' to check overflow. - if op.B3 { - // non-trapping conversion must "saturate" the value for overflowing source. - if v < 0 { - res = math.MinInt64 - } else { - res = math.MaxInt64 - } - } else { - panic(wasmruntime.ErrRuntimeIntegerOverflow) - } - } - ce.pushValue(uint64(res)) - case signedUint32: - v := math.Trunc(math.Float64frombits(ce.popValue())) - if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN - if op.B3 { - // non-trapping conversion must cast nan to zero. - v = 0 - } else { - panic(wasmruntime.ErrRuntimeInvalidConversionToInteger) - } - } else if v < 0 || v > math.MaxUint32 { - if op.B3 { - // non-trapping conversion must "saturate" the value for overflowing source. - if v < 0 { - v = 0 - } else { - v = math.MaxUint32 - } - } else { - panic(wasmruntime.ErrRuntimeIntegerOverflow) - } - } - ce.pushValue(uint64(uint32(v))) - case signedUint64: - v := math.Trunc(math.Float64frombits(ce.popValue())) - res := uint64(v) - if math.IsNaN(v) { // NaN cannot be compared with themselves, so we have to use IsNaN - if op.B3 { - // non-trapping conversion must cast nan to zero. - res = 0 - } else { - panic(wasmruntime.ErrRuntimeInvalidConversionToInteger) - } - } else if v < 0 || v >= math.MaxUint64 { - // Note: math.MaxUint64 is rounded up to math.MaxUint64+1 in 64-bit float representation, - // and that's why we use '>=' not '>' to check overflow. - if op.B3 { - // non-trapping conversion must "saturate" the value for overflowing source. - if v < 0 { - res = 0 - } else { - res = math.MaxUint64 - } - } else { - panic(wasmruntime.ErrRuntimeIntegerOverflow) - } - } - ce.pushValue(res) - } - } - frame.pc++ - case operationKindFConvertFromI: - switch signedInt(op.B1) { - case signedInt32: - if op.B2 == 0 { - // float32 - v := float32(int32(ce.popValue())) - ce.pushValue(uint64(math.Float32bits(v))) - } else { - // float64 - v := float64(int32(ce.popValue())) - ce.pushValue(math.Float64bits(v)) - } - case signedInt64: - if op.B2 == 0 { - // float32 - v := float32(int64(ce.popValue())) - ce.pushValue(uint64(math.Float32bits(v))) - } else { - // float64 - v := float64(int64(ce.popValue())) - ce.pushValue(math.Float64bits(v)) - } - case signedUint32: - if op.B2 == 0 { - // float32 - v := float32(uint32(ce.popValue())) - ce.pushValue(uint64(math.Float32bits(v))) - } else { - // float64 - v := float64(uint32(ce.popValue())) - ce.pushValue(math.Float64bits(v)) - } - case signedUint64: - if op.B2 == 0 { - // float32 - v := float32(ce.popValue()) - ce.pushValue(uint64(math.Float32bits(v))) - } else { - // float64 - v := float64(ce.popValue()) - ce.pushValue(math.Float64bits(v)) - } - } - frame.pc++ - case operationKindF32DemoteFromF64: - v := float32(math.Float64frombits(ce.popValue())) - ce.pushValue(uint64(math.Float32bits(v))) - frame.pc++ - case operationKindF64PromoteFromF32: - v := float64(math.Float32frombits(uint32(ce.popValue()))) - ce.pushValue(math.Float64bits(v)) - frame.pc++ - case operationKindExtend: - if op.B1 == 1 { - // Signed. - v := int64(int32(ce.popValue())) - ce.pushValue(uint64(v)) - } else { - v := uint64(uint32(ce.popValue())) - ce.pushValue(v) - } - frame.pc++ - case operationKindSignExtend32From8: - v := uint32(int8(ce.popValue())) - ce.pushValue(uint64(v)) - frame.pc++ - case operationKindSignExtend32From16: - v := uint32(int16(ce.popValue())) - ce.pushValue(uint64(v)) - frame.pc++ - case operationKindSignExtend64From8: - v := int64(int8(ce.popValue())) - ce.pushValue(uint64(v)) - frame.pc++ - case operationKindSignExtend64From16: - v := int64(int16(ce.popValue())) - ce.pushValue(uint64(v)) - frame.pc++ - case operationKindSignExtend64From32: - v := int64(int32(ce.popValue())) - ce.pushValue(uint64(v)) - frame.pc++ - case operationKindMemoryInit: - dataInstance := dataInstances[op.U1] - copySize := ce.popValue() - inDataOffset := ce.popValue() - inMemoryOffset := ce.popValue() - if inDataOffset+copySize > uint64(len(dataInstance)) || - inMemoryOffset+copySize > uint64(len(memoryInst.Buffer)) { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } else if copySize != 0 { - copy(memoryInst.Buffer[inMemoryOffset:inMemoryOffset+copySize], dataInstance[inDataOffset:]) - } - frame.pc++ - case operationKindDataDrop: - dataInstances[op.U1] = nil - frame.pc++ - case operationKindMemoryCopy: - memLen := uint64(len(memoryInst.Buffer)) - copySize := ce.popValue() - sourceOffset := ce.popValue() - destinationOffset := ce.popValue() - if sourceOffset+copySize > memLen || destinationOffset+copySize > memLen { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } else if copySize != 0 { - copy(memoryInst.Buffer[destinationOffset:], - memoryInst.Buffer[sourceOffset:sourceOffset+copySize]) - } - frame.pc++ - case operationKindMemoryFill: - fillSize := ce.popValue() - value := byte(ce.popValue()) - offset := ce.popValue() - if fillSize+offset > uint64(len(memoryInst.Buffer)) { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } else if fillSize != 0 { - // Uses the copy trick for faster filling buffer. - // https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d - buf := memoryInst.Buffer[offset : offset+fillSize] - buf[0] = value - for i := 1; i < len(buf); i *= 2 { - copy(buf[i:], buf[:i]) - } - } - frame.pc++ - case operationKindTableInit: - elementInstance := elementInstances[op.U1] - copySize := ce.popValue() - inElementOffset := ce.popValue() - inTableOffset := ce.popValue() - table := tables[op.U2] - if inElementOffset+copySize > uint64(len(elementInstance)) || - inTableOffset+copySize > uint64(len(table.References)) { - panic(wasmruntime.ErrRuntimeInvalidTableAccess) - } else if copySize != 0 { - copy(table.References[inTableOffset:inTableOffset+copySize], elementInstance[inElementOffset:]) - } - frame.pc++ - case operationKindElemDrop: - elementInstances[op.U1] = nil - frame.pc++ - case operationKindTableCopy: - srcTable, dstTable := tables[op.U1].References, tables[op.U2].References - copySize := ce.popValue() - sourceOffset := ce.popValue() - destinationOffset := ce.popValue() - if sourceOffset+copySize > uint64(len(srcTable)) || destinationOffset+copySize > uint64(len(dstTable)) { - panic(wasmruntime.ErrRuntimeInvalidTableAccess) - } else if copySize != 0 { - copy(dstTable[destinationOffset:], srcTable[sourceOffset:sourceOffset+copySize]) - } - frame.pc++ - case operationKindRefFunc: - ce.pushValue(uint64(uintptr(unsafe.Pointer(&functions[op.U1])))) - frame.pc++ - case operationKindTableGet: - table := tables[op.U1] - - offset := ce.popValue() - if offset >= uint64(len(table.References)) { - panic(wasmruntime.ErrRuntimeInvalidTableAccess) - } - - ce.pushValue(uint64(table.References[offset])) - frame.pc++ - case operationKindTableSet: - table := tables[op.U1] - ref := ce.popValue() - - offset := ce.popValue() - if offset >= uint64(len(table.References)) { - panic(wasmruntime.ErrRuntimeInvalidTableAccess) - } - - table.References[offset] = uintptr(ref) // externrefs are opaque uint64. - frame.pc++ - case operationKindTableSize: - table := tables[op.U1] - ce.pushValue(uint64(len(table.References))) - frame.pc++ - case operationKindTableGrow: - table := tables[op.U1] - num, ref := ce.popValue(), ce.popValue() - ret := table.Grow(uint32(num), uintptr(ref)) - ce.pushValue(uint64(ret)) - frame.pc++ - case operationKindTableFill: - table := tables[op.U1] - num := ce.popValue() - ref := uintptr(ce.popValue()) - offset := ce.popValue() - if num+offset > uint64(len(table.References)) { - panic(wasmruntime.ErrRuntimeInvalidTableAccess) - } else if num > 0 { - // Uses the copy trick for faster filling the region with the value. - // https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d - targetRegion := table.References[offset : offset+num] - targetRegion[0] = ref - for i := 1; i < len(targetRegion); i *= 2 { - copy(targetRegion[i:], targetRegion[:i]) - } - } - frame.pc++ - case operationKindV128Const: - lo, hi := op.U1, op.U2 - ce.pushValue(lo) - ce.pushValue(hi) - frame.pc++ - case operationKindV128Add: - yHigh, yLow := ce.popValue(), ce.popValue() - xHigh, xLow := ce.popValue(), ce.popValue() - switch op.B1 { - case shapeI8x16: - ce.pushValue( - uint64(uint8(xLow>>8)+uint8(yLow>>8))<<8 | uint64(uint8(xLow)+uint8(yLow)) | - uint64(uint8(xLow>>24)+uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)+uint8(yLow>>16))<<16 | - uint64(uint8(xLow>>40)+uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)+uint8(yLow>>32))<<32 | - uint64(uint8(xLow>>56)+uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)+uint8(yLow>>48))<<48, - ) - ce.pushValue( - uint64(uint8(xHigh>>8)+uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)+uint8(yHigh)) | - uint64(uint8(xHigh>>24)+uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)+uint8(yHigh>>16))<<16 | - uint64(uint8(xHigh>>40)+uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)+uint8(yHigh>>32))<<32 | - uint64(uint8(xHigh>>56)+uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)+uint8(yHigh>>48))<<48, - ) - case shapeI16x8: - ce.pushValue( - uint64(uint16(xLow>>16+yLow>>16))<<16 | uint64(uint16(xLow)+uint16(yLow)) | - uint64(uint16(xLow>>48+yLow>>48))<<48 | uint64(uint16(xLow>>32+yLow>>32))<<32, - ) - ce.pushValue( - uint64(uint16(xHigh>>16)+uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)+uint16(yHigh)) | - uint64(uint16(xHigh>>48)+uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)+uint16(yHigh>>32))<<32, - ) - case shapeI32x4: - ce.pushValue(uint64(uint32(xLow>>32)+uint32(yLow>>32))<<32 | uint64(uint32(xLow)+uint32(yLow))) - ce.pushValue(uint64(uint32(xHigh>>32)+uint32(yHigh>>32))<<32 | uint64(uint32(xHigh)+uint32(yHigh))) - case shapeI64x2: - ce.pushValue(xLow + yLow) - ce.pushValue(xHigh + yHigh) - case shapeF32x4: - ce.pushValue( - addFloat32bits(uint32(xLow), uint32(yLow)) | addFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32, - ) - ce.pushValue( - addFloat32bits(uint32(xHigh), uint32(yHigh)) | addFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32, - ) - case shapeF64x2: - ce.pushValue(math.Float64bits(math.Float64frombits(xLow) + math.Float64frombits(yLow))) - ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) + math.Float64frombits(yHigh))) - } - frame.pc++ - case operationKindV128Sub: - yHigh, yLow := ce.popValue(), ce.popValue() - xHigh, xLow := ce.popValue(), ce.popValue() - switch op.B1 { - case shapeI8x16: - ce.pushValue( - uint64(uint8(xLow>>8)-uint8(yLow>>8))<<8 | uint64(uint8(xLow)-uint8(yLow)) | - uint64(uint8(xLow>>24)-uint8(yLow>>24))<<24 | uint64(uint8(xLow>>16)-uint8(yLow>>16))<<16 | - uint64(uint8(xLow>>40)-uint8(yLow>>40))<<40 | uint64(uint8(xLow>>32)-uint8(yLow>>32))<<32 | - uint64(uint8(xLow>>56)-uint8(yLow>>56))<<56 | uint64(uint8(xLow>>48)-uint8(yLow>>48))<<48, - ) - ce.pushValue( - uint64(uint8(xHigh>>8)-uint8(yHigh>>8))<<8 | uint64(uint8(xHigh)-uint8(yHigh)) | - uint64(uint8(xHigh>>24)-uint8(yHigh>>24))<<24 | uint64(uint8(xHigh>>16)-uint8(yHigh>>16))<<16 | - uint64(uint8(xHigh>>40)-uint8(yHigh>>40))<<40 | uint64(uint8(xHigh>>32)-uint8(yHigh>>32))<<32 | - uint64(uint8(xHigh>>56)-uint8(yHigh>>56))<<56 | uint64(uint8(xHigh>>48)-uint8(yHigh>>48))<<48, - ) - case shapeI16x8: - ce.pushValue( - uint64(uint16(xLow>>16)-uint16(yLow>>16))<<16 | uint64(uint16(xLow)-uint16(yLow)) | - uint64(uint16(xLow>>48)-uint16(yLow>>48))<<48 | uint64(uint16(xLow>>32)-uint16(yLow>>32))<<32, - ) - ce.pushValue( - uint64(uint16(xHigh>>16)-uint16(yHigh>>16))<<16 | uint64(uint16(xHigh)-uint16(yHigh)) | - uint64(uint16(xHigh>>48)-uint16(yHigh>>48))<<48 | uint64(uint16(xHigh>>32)-uint16(yHigh>>32))<<32, - ) - case shapeI32x4: - ce.pushValue(uint64(uint32(xLow>>32-yLow>>32))<<32 | uint64(uint32(xLow)-uint32(yLow))) - ce.pushValue(uint64(uint32(xHigh>>32-yHigh>>32))<<32 | uint64(uint32(xHigh)-uint32(yHigh))) - case shapeI64x2: - ce.pushValue(xLow - yLow) - ce.pushValue(xHigh - yHigh) - case shapeF32x4: - ce.pushValue( - subFloat32bits(uint32(xLow), uint32(yLow)) | subFloat32bits(uint32(xLow>>32), uint32(yLow>>32))<<32, - ) - ce.pushValue( - subFloat32bits(uint32(xHigh), uint32(yHigh)) | subFloat32bits(uint32(xHigh>>32), uint32(yHigh>>32))<<32, - ) - case shapeF64x2: - ce.pushValue(math.Float64bits(math.Float64frombits(xLow) - math.Float64frombits(yLow))) - ce.pushValue(math.Float64bits(math.Float64frombits(xHigh) - math.Float64frombits(yHigh))) - } - frame.pc++ - case operationKindV128Load: - offset := ce.popMemoryOffset(op) - switch op.B1 { - case v128LoadType128: - lo, ok := memoryInst.ReadUint64Le(offset) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue(lo) - hi, ok := memoryInst.ReadUint64Le(offset + 8) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue(hi) - case v128LoadType8x8s: - data, ok := memoryInst.Read(offset, 8) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue( - uint64(uint16(int8(data[3])))<<48 | uint64(uint16(int8(data[2])))<<32 | uint64(uint16(int8(data[1])))<<16 | uint64(uint16(int8(data[0]))), - ) - ce.pushValue( - uint64(uint16(int8(data[7])))<<48 | uint64(uint16(int8(data[6])))<<32 | uint64(uint16(int8(data[5])))<<16 | uint64(uint16(int8(data[4]))), - ) - case v128LoadType8x8u: - data, ok := memoryInst.Read(offset, 8) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue( - uint64(data[3])<<48 | uint64(data[2])<<32 | uint64(data[1])<<16 | uint64(data[0]), - ) - ce.pushValue( - uint64(data[7])<<48 | uint64(data[6])<<32 | uint64(data[5])<<16 | uint64(data[4]), - ) - case v128LoadType16x4s: - data, ok := memoryInst.Read(offset, 8) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue( - uint64(int16(binary.LittleEndian.Uint16(data[2:])))<<32 | - uint64(uint32(int16(binary.LittleEndian.Uint16(data)))), - ) - ce.pushValue( - uint64(uint32(int16(binary.LittleEndian.Uint16(data[6:]))))<<32 | - uint64(uint32(int16(binary.LittleEndian.Uint16(data[4:])))), - ) - case v128LoadType16x4u: - data, ok := memoryInst.Read(offset, 8) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue( - uint64(binary.LittleEndian.Uint16(data[2:]))<<32 | uint64(binary.LittleEndian.Uint16(data)), - ) - ce.pushValue( - uint64(binary.LittleEndian.Uint16(data[6:]))<<32 | uint64(binary.LittleEndian.Uint16(data[4:])), - ) - case v128LoadType32x2s: - data, ok := memoryInst.Read(offset, 8) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data)))) - ce.pushValue(uint64(int32(binary.LittleEndian.Uint32(data[4:])))) - case v128LoadType32x2u: - data, ok := memoryInst.Read(offset, 8) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue(uint64(binary.LittleEndian.Uint32(data))) - ce.pushValue(uint64(binary.LittleEndian.Uint32(data[4:]))) - case v128LoadType8Splat: - v, ok := memoryInst.ReadByte(offset) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - v8 := uint64(v)<<56 | uint64(v)<<48 | uint64(v)<<40 | uint64(v)<<32 | - uint64(v)<<24 | uint64(v)<<16 | uint64(v)<<8 | uint64(v) - ce.pushValue(v8) - ce.pushValue(v8) - case v128LoadType16Splat: - v, ok := memoryInst.ReadUint16Le(offset) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - v4 := uint64(v)<<48 | uint64(v)<<32 | uint64(v)<<16 | uint64(v) - ce.pushValue(v4) - ce.pushValue(v4) - case v128LoadType32Splat: - v, ok := memoryInst.ReadUint32Le(offset) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - vv := uint64(v)<<32 | uint64(v) - ce.pushValue(vv) - ce.pushValue(vv) - case v128LoadType64Splat: - lo, ok := memoryInst.ReadUint64Le(offset) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue(lo) - ce.pushValue(lo) - case v128LoadType32zero: - lo, ok := memoryInst.ReadUint32Le(offset) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue(uint64(lo)) - ce.pushValue(0) - case v128LoadType64zero: - lo, ok := memoryInst.ReadUint64Le(offset) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue(lo) - ce.pushValue(0) - } - frame.pc++ - case operationKindV128LoadLane: - hi, lo := ce.popValue(), ce.popValue() - offset := ce.popMemoryOffset(op) - switch op.B1 { - case 8: - b, ok := memoryInst.ReadByte(offset) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - if op.B2 < 8 { - s := op.B2 << 3 - lo = (lo & ^(0xff << s)) | uint64(b)<<s - } else { - s := (op.B2 - 8) << 3 - hi = (hi & ^(0xff << s)) | uint64(b)<<s - } - case 16: - b, ok := memoryInst.ReadUint16Le(offset) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - if op.B2 < 4 { - s := op.B2 << 4 - lo = (lo & ^(0xff_ff << s)) | uint64(b)<<s - } else { - s := (op.B2 - 4) << 4 - hi = (hi & ^(0xff_ff << s)) | uint64(b)<<s - } - case 32: - b, ok := memoryInst.ReadUint32Le(offset) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - if op.B2 < 2 { - s := op.B2 << 5 - lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s - } else { - s := (op.B2 - 2) << 5 - hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(b)<<s - } - case 64: - b, ok := memoryInst.ReadUint64Le(offset) - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - if op.B2 == 0 { - lo = b - } else { - hi = b - } - } - ce.pushValue(lo) - ce.pushValue(hi) - frame.pc++ - case operationKindV128Store: - hi, lo := ce.popValue(), ce.popValue() - offset := ce.popMemoryOffset(op) - // Write the upper bytes first to trigger an early error if the memory access is out of bounds. - // Otherwise, the lower bytes might be written to memory, but the upper bytes might not. - if uint64(offset)+8 > math.MaxUint32 { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - if ok := memoryInst.WriteUint64Le(offset+8, hi); !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - if ok := memoryInst.WriteUint64Le(offset, lo); !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - frame.pc++ - case operationKindV128StoreLane: - hi, lo := ce.popValue(), ce.popValue() - offset := ce.popMemoryOffset(op) - var ok bool - switch op.B1 { - case 8: - if op.B2 < 8 { - ok = memoryInst.WriteByte(offset, byte(lo>>(op.B2*8))) - } else { - ok = memoryInst.WriteByte(offset, byte(hi>>((op.B2-8)*8))) - } - case 16: - if op.B2 < 4 { - ok = memoryInst.WriteUint16Le(offset, uint16(lo>>(op.B2*16))) - } else { - ok = memoryInst.WriteUint16Le(offset, uint16(hi>>((op.B2-4)*16))) - } - case 32: - if op.B2 < 2 { - ok = memoryInst.WriteUint32Le(offset, uint32(lo>>(op.B2*32))) - } else { - ok = memoryInst.WriteUint32Le(offset, uint32(hi>>((op.B2-2)*32))) - } - case 64: - if op.B2 == 0 { - ok = memoryInst.WriteUint64Le(offset, lo) - } else { - ok = memoryInst.WriteUint64Le(offset, hi) - } - } - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - frame.pc++ - case operationKindV128ReplaceLane: - v := ce.popValue() - hi, lo := ce.popValue(), ce.popValue() - switch op.B1 { - case shapeI8x16: - if op.B2 < 8 { - s := op.B2 << 3 - lo = (lo & ^(0xff << s)) | uint64(byte(v))<<s - } else { - s := (op.B2 - 8) << 3 - hi = (hi & ^(0xff << s)) | uint64(byte(v))<<s - } - case shapeI16x8: - if op.B2 < 4 { - s := op.B2 << 4 - lo = (lo & ^(0xff_ff << s)) | uint64(uint16(v))<<s - } else { - s := (op.B2 - 4) << 4 - hi = (hi & ^(0xff_ff << s)) | uint64(uint16(v))<<s - } - case shapeI32x4, shapeF32x4: - if op.B2 < 2 { - s := op.B2 << 5 - lo = (lo & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s - } else { - s := (op.B2 - 2) << 5 - hi = (hi & ^(0xff_ff_ff_ff << s)) | uint64(uint32(v))<<s - } - case shapeI64x2, shapeF64x2: - if op.B2 == 0 { - lo = v - } else { - hi = v - } - } - ce.pushValue(lo) - ce.pushValue(hi) - frame.pc++ - case operationKindV128ExtractLane: - hi, lo := ce.popValue(), ce.popValue() - var v uint64 - switch op.B1 { - case shapeI8x16: - var u8 byte - if op.B2 < 8 { - u8 = byte(lo >> (op.B2 * 8)) - } else { - u8 = byte(hi >> ((op.B2 - 8) * 8)) - } - if op.B3 { - // sign-extend. - v = uint64(uint32(int8(u8))) - } else { - v = uint64(u8) - } - case shapeI16x8: - var u16 uint16 - if op.B2 < 4 { - u16 = uint16(lo >> (op.B2 * 16)) - } else { - u16 = uint16(hi >> ((op.B2 - 4) * 16)) - } - if op.B3 { - // sign-extend. - v = uint64(uint32(int16(u16))) - } else { - v = uint64(u16) - } - case shapeI32x4, shapeF32x4: - if op.B2 < 2 { - v = uint64(uint32(lo >> (op.B2 * 32))) - } else { - v = uint64(uint32(hi >> ((op.B2 - 2) * 32))) - } - case shapeI64x2, shapeF64x2: - if op.B2 == 0 { - v = lo - } else { - v = hi - } - } - ce.pushValue(v) - frame.pc++ - case operationKindV128Splat: - v := ce.popValue() - var hi, lo uint64 - switch op.B1 { - case shapeI8x16: - v8 := uint64(byte(v))<<56 | uint64(byte(v))<<48 | uint64(byte(v))<<40 | uint64(byte(v))<<32 | - uint64(byte(v))<<24 | uint64(byte(v))<<16 | uint64(byte(v))<<8 | uint64(byte(v)) - hi, lo = v8, v8 - case shapeI16x8: - v4 := uint64(uint16(v))<<48 | uint64(uint16(v))<<32 | uint64(uint16(v))<<16 | uint64(uint16(v)) - hi, lo = v4, v4 - case shapeI32x4, shapeF32x4: - v2 := uint64(uint32(v))<<32 | uint64(uint32(v)) - lo, hi = v2, v2 - case shapeI64x2, shapeF64x2: - lo, hi = v, v - } - ce.pushValue(lo) - ce.pushValue(hi) - frame.pc++ - case operationKindV128Swizzle: - idxHi, idxLo := ce.popValue(), ce.popValue() - baseHi, baseLo := ce.popValue(), ce.popValue() - var newVal [16]byte - for i := 0; i < 16; i++ { - var id byte - if i < 8 { - id = byte(idxLo >> (i * 8)) - } else { - id = byte(idxHi >> ((i - 8) * 8)) - } - if id < 8 { - newVal[i] = byte(baseLo >> (id * 8)) - } else if id < 16 { - newVal[i] = byte(baseHi >> ((id - 8) * 8)) - } - } - ce.pushValue(binary.LittleEndian.Uint64(newVal[:8])) - ce.pushValue(binary.LittleEndian.Uint64(newVal[8:])) - frame.pc++ - case operationKindV128Shuffle: - xHi, xLo, yHi, yLo := ce.popValue(), ce.popValue(), ce.popValue(), ce.popValue() - var newVal [16]byte - for i, l := range op.Us { - if l < 8 { - newVal[i] = byte(yLo >> (l * 8)) - } else if l < 16 { - newVal[i] = byte(yHi >> ((l - 8) * 8)) - } else if l < 24 { - newVal[i] = byte(xLo >> ((l - 16) * 8)) - } else if l < 32 { - newVal[i] = byte(xHi >> ((l - 24) * 8)) - } - } - ce.pushValue(binary.LittleEndian.Uint64(newVal[:8])) - ce.pushValue(binary.LittleEndian.Uint64(newVal[8:])) - frame.pc++ - case operationKindV128AnyTrue: - hi, lo := ce.popValue(), ce.popValue() - if hi != 0 || lo != 0 { - ce.pushValue(1) - } else { - ce.pushValue(0) - } - frame.pc++ - case operationKindV128AllTrue: - hi, lo := ce.popValue(), ce.popValue() - var ret bool - switch op.B1 { - case shapeI8x16: - ret = (uint8(lo) != 0) && (uint8(lo>>8) != 0) && (uint8(lo>>16) != 0) && (uint8(lo>>24) != 0) && - (uint8(lo>>32) != 0) && (uint8(lo>>40) != 0) && (uint8(lo>>48) != 0) && (uint8(lo>>56) != 0) && - (uint8(hi) != 0) && (uint8(hi>>8) != 0) && (uint8(hi>>16) != 0) && (uint8(hi>>24) != 0) && - (uint8(hi>>32) != 0) && (uint8(hi>>40) != 0) && (uint8(hi>>48) != 0) && (uint8(hi>>56) != 0) - case shapeI16x8: - ret = (uint16(lo) != 0) && (uint16(lo>>16) != 0) && (uint16(lo>>32) != 0) && (uint16(lo>>48) != 0) && - (uint16(hi) != 0) && (uint16(hi>>16) != 0) && (uint16(hi>>32) != 0) && (uint16(hi>>48) != 0) - case shapeI32x4: - ret = (uint32(lo) != 0) && (uint32(lo>>32) != 0) && - (uint32(hi) != 0) && (uint32(hi>>32) != 0) - case shapeI64x2: - ret = (lo != 0) && - (hi != 0) - } - if ret { - ce.pushValue(1) - } else { - ce.pushValue(0) - } - frame.pc++ - case operationKindV128BitMask: - // https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitmask-extraction - hi, lo := ce.popValue(), ce.popValue() - var res uint64 - switch op.B1 { - case shapeI8x16: - for i := 0; i < 8; i++ { - if int8(lo>>(i*8)) < 0 { - res |= 1 << i - } - } - for i := 0; i < 8; i++ { - if int8(hi>>(i*8)) < 0 { - res |= 1 << (i + 8) - } - } - case shapeI16x8: - for i := 0; i < 4; i++ { - if int16(lo>>(i*16)) < 0 { - res |= 1 << i - } - } - for i := 0; i < 4; i++ { - if int16(hi>>(i*16)) < 0 { - res |= 1 << (i + 4) - } - } - case shapeI32x4: - for i := 0; i < 2; i++ { - if int32(lo>>(i*32)) < 0 { - res |= 1 << i - } - } - for i := 0; i < 2; i++ { - if int32(hi>>(i*32)) < 0 { - res |= 1 << (i + 2) - } - } - case shapeI64x2: - if int64(lo) < 0 { - res |= 0b01 - } - if int(hi) < 0 { - res |= 0b10 - } - } - ce.pushValue(res) - frame.pc++ - case operationKindV128And: - x2Hi, x2Lo := ce.popValue(), ce.popValue() - x1Hi, x1Lo := ce.popValue(), ce.popValue() - ce.pushValue(x1Lo & x2Lo) - ce.pushValue(x1Hi & x2Hi) - frame.pc++ - case operationKindV128Not: - hi, lo := ce.popValue(), ce.popValue() - ce.pushValue(^lo) - ce.pushValue(^hi) - frame.pc++ - case operationKindV128Or: - x2Hi, x2Lo := ce.popValue(), ce.popValue() - x1Hi, x1Lo := ce.popValue(), ce.popValue() - ce.pushValue(x1Lo | x2Lo) - ce.pushValue(x1Hi | x2Hi) - frame.pc++ - case operationKindV128Xor: - x2Hi, x2Lo := ce.popValue(), ce.popValue() - x1Hi, x1Lo := ce.popValue(), ce.popValue() - ce.pushValue(x1Lo ^ x2Lo) - ce.pushValue(x1Hi ^ x2Hi) - frame.pc++ - case operationKindV128Bitselect: - // https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#bitwise-select - cHi, cLo := ce.popValue(), ce.popValue() - x2Hi, x2Lo := ce.popValue(), ce.popValue() - x1Hi, x1Lo := ce.popValue(), ce.popValue() - // v128.or(v128.and(v1, c), v128.and(v2, v128.not(c))) - ce.pushValue((x1Lo & cLo) | (x2Lo & (^cLo))) - ce.pushValue((x1Hi & cHi) | (x2Hi & (^cHi))) - frame.pc++ - case operationKindV128AndNot: - x2Hi, x2Lo := ce.popValue(), ce.popValue() - x1Hi, x1Lo := ce.popValue(), ce.popValue() - ce.pushValue(x1Lo & (^x2Lo)) - ce.pushValue(x1Hi & (^x2Hi)) - frame.pc++ - case operationKindV128Shl: - s := ce.popValue() - hi, lo := ce.popValue(), ce.popValue() - switch op.B1 { - case shapeI8x16: - s = s % 8 - lo = uint64(uint8(lo<<s)) | - uint64(uint8((lo>>8)<<s))<<8 | - uint64(uint8((lo>>16)<<s))<<16 | - uint64(uint8((lo>>24)<<s))<<24 | - uint64(uint8((lo>>32)<<s))<<32 | - uint64(uint8((lo>>40)<<s))<<40 | - uint64(uint8((lo>>48)<<s))<<48 | - uint64(uint8((lo>>56)<<s))<<56 - hi = uint64(uint8(hi<<s)) | - uint64(uint8((hi>>8)<<s))<<8 | - uint64(uint8((hi>>16)<<s))<<16 | - uint64(uint8((hi>>24)<<s))<<24 | - uint64(uint8((hi>>32)<<s))<<32 | - uint64(uint8((hi>>40)<<s))<<40 | - uint64(uint8((hi>>48)<<s))<<48 | - uint64(uint8((hi>>56)<<s))<<56 - case shapeI16x8: - s = s % 16 - lo = uint64(uint16(lo<<s)) | - uint64(uint16((lo>>16)<<s))<<16 | - uint64(uint16((lo>>32)<<s))<<32 | - uint64(uint16((lo>>48)<<s))<<48 - hi = uint64(uint16(hi<<s)) | - uint64(uint16((hi>>16)<<s))<<16 | - uint64(uint16((hi>>32)<<s))<<32 | - uint64(uint16((hi>>48)<<s))<<48 - case shapeI32x4: - s = s % 32 - lo = uint64(uint32(lo<<s)) | uint64(uint32((lo>>32)<<s))<<32 - hi = uint64(uint32(hi<<s)) | uint64(uint32((hi>>32)<<s))<<32 - case shapeI64x2: - s = s % 64 - lo = lo << s - hi = hi << s - } - ce.pushValue(lo) - ce.pushValue(hi) - frame.pc++ - case operationKindV128Shr: - s := ce.popValue() - hi, lo := ce.popValue(), ce.popValue() - switch op.B1 { - case shapeI8x16: - s = s % 8 - if op.B3 { // signed - lo = uint64(uint8(int8(lo)>>s)) | - uint64(uint8(int8(lo>>8)>>s))<<8 | - uint64(uint8(int8(lo>>16)>>s))<<16 | - uint64(uint8(int8(lo>>24)>>s))<<24 | - uint64(uint8(int8(lo>>32)>>s))<<32 | - uint64(uint8(int8(lo>>40)>>s))<<40 | - uint64(uint8(int8(lo>>48)>>s))<<48 | - uint64(uint8(int8(lo>>56)>>s))<<56 - hi = uint64(uint8(int8(hi)>>s)) | - uint64(uint8(int8(hi>>8)>>s))<<8 | - uint64(uint8(int8(hi>>16)>>s))<<16 | - uint64(uint8(int8(hi>>24)>>s))<<24 | - uint64(uint8(int8(hi>>32)>>s))<<32 | - uint64(uint8(int8(hi>>40)>>s))<<40 | - uint64(uint8(int8(hi>>48)>>s))<<48 | - uint64(uint8(int8(hi>>56)>>s))<<56 - } else { - lo = uint64(uint8(lo)>>s) | - uint64(uint8(lo>>8)>>s)<<8 | - uint64(uint8(lo>>16)>>s)<<16 | - uint64(uint8(lo>>24)>>s)<<24 | - uint64(uint8(lo>>32)>>s)<<32 | - uint64(uint8(lo>>40)>>s)<<40 | - uint64(uint8(lo>>48)>>s)<<48 | - uint64(uint8(lo>>56)>>s)<<56 - hi = uint64(uint8(hi)>>s) | - uint64(uint8(hi>>8)>>s)<<8 | - uint64(uint8(hi>>16)>>s)<<16 | - uint64(uint8(hi>>24)>>s)<<24 | - uint64(uint8(hi>>32)>>s)<<32 | - uint64(uint8(hi>>40)>>s)<<40 | - uint64(uint8(hi>>48)>>s)<<48 | - uint64(uint8(hi>>56)>>s)<<56 - } - case shapeI16x8: - s = s % 16 - if op.B3 { // signed - lo = uint64(uint16(int16(lo)>>s)) | - uint64(uint16(int16(lo>>16)>>s))<<16 | - uint64(uint16(int16(lo>>32)>>s))<<32 | - uint64(uint16(int16(lo>>48)>>s))<<48 - hi = uint64(uint16(int16(hi)>>s)) | - uint64(uint16(int16(hi>>16)>>s))<<16 | - uint64(uint16(int16(hi>>32)>>s))<<32 | - uint64(uint16(int16(hi>>48)>>s))<<48 - } else { - lo = uint64(uint16(lo)>>s) | - uint64(uint16(lo>>16)>>s)<<16 | - uint64(uint16(lo>>32)>>s)<<32 | - uint64(uint16(lo>>48)>>s)<<48 - hi = uint64(uint16(hi)>>s) | - uint64(uint16(hi>>16)>>s)<<16 | - uint64(uint16(hi>>32)>>s)<<32 | - uint64(uint16(hi>>48)>>s)<<48 - } - case shapeI32x4: - s = s % 32 - if op.B3 { - lo = uint64(uint32(int32(lo)>>s)) | uint64(uint32(int32(lo>>32)>>s))<<32 - hi = uint64(uint32(int32(hi)>>s)) | uint64(uint32(int32(hi>>32)>>s))<<32 - } else { - lo = uint64(uint32(lo)>>s) | uint64(uint32(lo>>32)>>s)<<32 - hi = uint64(uint32(hi)>>s) | uint64(uint32(hi>>32)>>s)<<32 - } - case shapeI64x2: - s = s % 64 - if op.B3 { // signed - lo = uint64(int64(lo) >> s) - hi = uint64(int64(hi) >> s) - } else { - lo = lo >> s - hi = hi >> s - } - - } - ce.pushValue(lo) - ce.pushValue(hi) - frame.pc++ - case operationKindV128Cmp: - x2Hi, x2Lo := ce.popValue(), ce.popValue() - x1Hi, x1Lo := ce.popValue(), ce.popValue() - var result []bool - switch op.B1 { - case v128CmpTypeI8x16Eq: - result = []bool{ - byte(x1Lo>>0) == byte(x2Lo>>0), byte(x1Lo>>8) == byte(x2Lo>>8), - byte(x1Lo>>16) == byte(x2Lo>>16), byte(x1Lo>>24) == byte(x2Lo>>24), - byte(x1Lo>>32) == byte(x2Lo>>32), byte(x1Lo>>40) == byte(x2Lo>>40), - byte(x1Lo>>48) == byte(x2Lo>>48), byte(x1Lo>>56) == byte(x2Lo>>56), - byte(x1Hi>>0) == byte(x2Hi>>0), byte(x1Hi>>8) == byte(x2Hi>>8), - byte(x1Hi>>16) == byte(x2Hi>>16), byte(x1Hi>>24) == byte(x2Hi>>24), - byte(x1Hi>>32) == byte(x2Hi>>32), byte(x1Hi>>40) == byte(x2Hi>>40), - byte(x1Hi>>48) == byte(x2Hi>>48), byte(x1Hi>>56) == byte(x2Hi>>56), - } - case v128CmpTypeI8x16Ne: - result = []bool{ - byte(x1Lo>>0) != byte(x2Lo>>0), byte(x1Lo>>8) != byte(x2Lo>>8), - byte(x1Lo>>16) != byte(x2Lo>>16), byte(x1Lo>>24) != byte(x2Lo>>24), - byte(x1Lo>>32) != byte(x2Lo>>32), byte(x1Lo>>40) != byte(x2Lo>>40), - byte(x1Lo>>48) != byte(x2Lo>>48), byte(x1Lo>>56) != byte(x2Lo>>56), - byte(x1Hi>>0) != byte(x2Hi>>0), byte(x1Hi>>8) != byte(x2Hi>>8), - byte(x1Hi>>16) != byte(x2Hi>>16), byte(x1Hi>>24) != byte(x2Hi>>24), - byte(x1Hi>>32) != byte(x2Hi>>32), byte(x1Hi>>40) != byte(x2Hi>>40), - byte(x1Hi>>48) != byte(x2Hi>>48), byte(x1Hi>>56) != byte(x2Hi>>56), - } - case v128CmpTypeI8x16LtS: - result = []bool{ - int8(x1Lo>>0) < int8(x2Lo>>0), int8(x1Lo>>8) < int8(x2Lo>>8), - int8(x1Lo>>16) < int8(x2Lo>>16), int8(x1Lo>>24) < int8(x2Lo>>24), - int8(x1Lo>>32) < int8(x2Lo>>32), int8(x1Lo>>40) < int8(x2Lo>>40), - int8(x1Lo>>48) < int8(x2Lo>>48), int8(x1Lo>>56) < int8(x2Lo>>56), - int8(x1Hi>>0) < int8(x2Hi>>0), int8(x1Hi>>8) < int8(x2Hi>>8), - int8(x1Hi>>16) < int8(x2Hi>>16), int8(x1Hi>>24) < int8(x2Hi>>24), - int8(x1Hi>>32) < int8(x2Hi>>32), int8(x1Hi>>40) < int8(x2Hi>>40), - int8(x1Hi>>48) < int8(x2Hi>>48), int8(x1Hi>>56) < int8(x2Hi>>56), - } - case v128CmpTypeI8x16LtU: - result = []bool{ - byte(x1Lo>>0) < byte(x2Lo>>0), byte(x1Lo>>8) < byte(x2Lo>>8), - byte(x1Lo>>16) < byte(x2Lo>>16), byte(x1Lo>>24) < byte(x2Lo>>24), - byte(x1Lo>>32) < byte(x2Lo>>32), byte(x1Lo>>40) < byte(x2Lo>>40), - byte(x1Lo>>48) < byte(x2Lo>>48), byte(x1Lo>>56) < byte(x2Lo>>56), - byte(x1Hi>>0) < byte(x2Hi>>0), byte(x1Hi>>8) < byte(x2Hi>>8), - byte(x1Hi>>16) < byte(x2Hi>>16), byte(x1Hi>>24) < byte(x2Hi>>24), - byte(x1Hi>>32) < byte(x2Hi>>32), byte(x1Hi>>40) < byte(x2Hi>>40), - byte(x1Hi>>48) < byte(x2Hi>>48), byte(x1Hi>>56) < byte(x2Hi>>56), - } - case v128CmpTypeI8x16GtS: - result = []bool{ - int8(x1Lo>>0) > int8(x2Lo>>0), int8(x1Lo>>8) > int8(x2Lo>>8), - int8(x1Lo>>16) > int8(x2Lo>>16), int8(x1Lo>>24) > int8(x2Lo>>24), - int8(x1Lo>>32) > int8(x2Lo>>32), int8(x1Lo>>40) > int8(x2Lo>>40), - int8(x1Lo>>48) > int8(x2Lo>>48), int8(x1Lo>>56) > int8(x2Lo>>56), - int8(x1Hi>>0) > int8(x2Hi>>0), int8(x1Hi>>8) > int8(x2Hi>>8), - int8(x1Hi>>16) > int8(x2Hi>>16), int8(x1Hi>>24) > int8(x2Hi>>24), - int8(x1Hi>>32) > int8(x2Hi>>32), int8(x1Hi>>40) > int8(x2Hi>>40), - int8(x1Hi>>48) > int8(x2Hi>>48), int8(x1Hi>>56) > int8(x2Hi>>56), - } - case v128CmpTypeI8x16GtU: - result = []bool{ - byte(x1Lo>>0) > byte(x2Lo>>0), byte(x1Lo>>8) > byte(x2Lo>>8), - byte(x1Lo>>16) > byte(x2Lo>>16), byte(x1Lo>>24) > byte(x2Lo>>24), - byte(x1Lo>>32) > byte(x2Lo>>32), byte(x1Lo>>40) > byte(x2Lo>>40), - byte(x1Lo>>48) > byte(x2Lo>>48), byte(x1Lo>>56) > byte(x2Lo>>56), - byte(x1Hi>>0) > byte(x2Hi>>0), byte(x1Hi>>8) > byte(x2Hi>>8), - byte(x1Hi>>16) > byte(x2Hi>>16), byte(x1Hi>>24) > byte(x2Hi>>24), - byte(x1Hi>>32) > byte(x2Hi>>32), byte(x1Hi>>40) > byte(x2Hi>>40), - byte(x1Hi>>48) > byte(x2Hi>>48), byte(x1Hi>>56) > byte(x2Hi>>56), - } - case v128CmpTypeI8x16LeS: - result = []bool{ - int8(x1Lo>>0) <= int8(x2Lo>>0), int8(x1Lo>>8) <= int8(x2Lo>>8), - int8(x1Lo>>16) <= int8(x2Lo>>16), int8(x1Lo>>24) <= int8(x2Lo>>24), - int8(x1Lo>>32) <= int8(x2Lo>>32), int8(x1Lo>>40) <= int8(x2Lo>>40), - int8(x1Lo>>48) <= int8(x2Lo>>48), int8(x1Lo>>56) <= int8(x2Lo>>56), - int8(x1Hi>>0) <= int8(x2Hi>>0), int8(x1Hi>>8) <= int8(x2Hi>>8), - int8(x1Hi>>16) <= int8(x2Hi>>16), int8(x1Hi>>24) <= int8(x2Hi>>24), - int8(x1Hi>>32) <= int8(x2Hi>>32), int8(x1Hi>>40) <= int8(x2Hi>>40), - int8(x1Hi>>48) <= int8(x2Hi>>48), int8(x1Hi>>56) <= int8(x2Hi>>56), - } - case v128CmpTypeI8x16LeU: - result = []bool{ - byte(x1Lo>>0) <= byte(x2Lo>>0), byte(x1Lo>>8) <= byte(x2Lo>>8), - byte(x1Lo>>16) <= byte(x2Lo>>16), byte(x1Lo>>24) <= byte(x2Lo>>24), - byte(x1Lo>>32) <= byte(x2Lo>>32), byte(x1Lo>>40) <= byte(x2Lo>>40), - byte(x1Lo>>48) <= byte(x2Lo>>48), byte(x1Lo>>56) <= byte(x2Lo>>56), - byte(x1Hi>>0) <= byte(x2Hi>>0), byte(x1Hi>>8) <= byte(x2Hi>>8), - byte(x1Hi>>16) <= byte(x2Hi>>16), byte(x1Hi>>24) <= byte(x2Hi>>24), - byte(x1Hi>>32) <= byte(x2Hi>>32), byte(x1Hi>>40) <= byte(x2Hi>>40), - byte(x1Hi>>48) <= byte(x2Hi>>48), byte(x1Hi>>56) <= byte(x2Hi>>56), - } - case v128CmpTypeI8x16GeS: - result = []bool{ - int8(x1Lo>>0) >= int8(x2Lo>>0), int8(x1Lo>>8) >= int8(x2Lo>>8), - int8(x1Lo>>16) >= int8(x2Lo>>16), int8(x1Lo>>24) >= int8(x2Lo>>24), - int8(x1Lo>>32) >= int8(x2Lo>>32), int8(x1Lo>>40) >= int8(x2Lo>>40), - int8(x1Lo>>48) >= int8(x2Lo>>48), int8(x1Lo>>56) >= int8(x2Lo>>56), - int8(x1Hi>>0) >= int8(x2Hi>>0), int8(x1Hi>>8) >= int8(x2Hi>>8), - int8(x1Hi>>16) >= int8(x2Hi>>16), int8(x1Hi>>24) >= int8(x2Hi>>24), - int8(x1Hi>>32) >= int8(x2Hi>>32), int8(x1Hi>>40) >= int8(x2Hi>>40), - int8(x1Hi>>48) >= int8(x2Hi>>48), int8(x1Hi>>56) >= int8(x2Hi>>56), - } - case v128CmpTypeI8x16GeU: - result = []bool{ - byte(x1Lo>>0) >= byte(x2Lo>>0), byte(x1Lo>>8) >= byte(x2Lo>>8), - byte(x1Lo>>16) >= byte(x2Lo>>16), byte(x1Lo>>24) >= byte(x2Lo>>24), - byte(x1Lo>>32) >= byte(x2Lo>>32), byte(x1Lo>>40) >= byte(x2Lo>>40), - byte(x1Lo>>48) >= byte(x2Lo>>48), byte(x1Lo>>56) >= byte(x2Lo>>56), - byte(x1Hi>>0) >= byte(x2Hi>>0), byte(x1Hi>>8) >= byte(x2Hi>>8), - byte(x1Hi>>16) >= byte(x2Hi>>16), byte(x1Hi>>24) >= byte(x2Hi>>24), - byte(x1Hi>>32) >= byte(x2Hi>>32), byte(x1Hi>>40) >= byte(x2Hi>>40), - byte(x1Hi>>48) >= byte(x2Hi>>48), byte(x1Hi>>56) >= byte(x2Hi>>56), - } - case v128CmpTypeI16x8Eq: - result = []bool{ - uint16(x1Lo>>0) == uint16(x2Lo>>0), uint16(x1Lo>>16) == uint16(x2Lo>>16), - uint16(x1Lo>>32) == uint16(x2Lo>>32), uint16(x1Lo>>48) == uint16(x2Lo>>48), - uint16(x1Hi>>0) == uint16(x2Hi>>0), uint16(x1Hi>>16) == uint16(x2Hi>>16), - uint16(x1Hi>>32) == uint16(x2Hi>>32), uint16(x1Hi>>48) == uint16(x2Hi>>48), - } - case v128CmpTypeI16x8Ne: - result = []bool{ - uint16(x1Lo>>0) != uint16(x2Lo>>0), uint16(x1Lo>>16) != uint16(x2Lo>>16), - uint16(x1Lo>>32) != uint16(x2Lo>>32), uint16(x1Lo>>48) != uint16(x2Lo>>48), - uint16(x1Hi>>0) != uint16(x2Hi>>0), uint16(x1Hi>>16) != uint16(x2Hi>>16), - uint16(x1Hi>>32) != uint16(x2Hi>>32), uint16(x1Hi>>48) != uint16(x2Hi>>48), - } - case v128CmpTypeI16x8LtS: - result = []bool{ - int16(x1Lo>>0) < int16(x2Lo>>0), int16(x1Lo>>16) < int16(x2Lo>>16), - int16(x1Lo>>32) < int16(x2Lo>>32), int16(x1Lo>>48) < int16(x2Lo>>48), - int16(x1Hi>>0) < int16(x2Hi>>0), int16(x1Hi>>16) < int16(x2Hi>>16), - int16(x1Hi>>32) < int16(x2Hi>>32), int16(x1Hi>>48) < int16(x2Hi>>48), - } - case v128CmpTypeI16x8LtU: - result = []bool{ - uint16(x1Lo>>0) < uint16(x2Lo>>0), uint16(x1Lo>>16) < uint16(x2Lo>>16), - uint16(x1Lo>>32) < uint16(x2Lo>>32), uint16(x1Lo>>48) < uint16(x2Lo>>48), - uint16(x1Hi>>0) < uint16(x2Hi>>0), uint16(x1Hi>>16) < uint16(x2Hi>>16), - uint16(x1Hi>>32) < uint16(x2Hi>>32), uint16(x1Hi>>48) < uint16(x2Hi>>48), - } - case v128CmpTypeI16x8GtS: - result = []bool{ - int16(x1Lo>>0) > int16(x2Lo>>0), int16(x1Lo>>16) > int16(x2Lo>>16), - int16(x1Lo>>32) > int16(x2Lo>>32), int16(x1Lo>>48) > int16(x2Lo>>48), - int16(x1Hi>>0) > int16(x2Hi>>0), int16(x1Hi>>16) > int16(x2Hi>>16), - int16(x1Hi>>32) > int16(x2Hi>>32), int16(x1Hi>>48) > int16(x2Hi>>48), - } - case v128CmpTypeI16x8GtU: - result = []bool{ - uint16(x1Lo>>0) > uint16(x2Lo>>0), uint16(x1Lo>>16) > uint16(x2Lo>>16), - uint16(x1Lo>>32) > uint16(x2Lo>>32), uint16(x1Lo>>48) > uint16(x2Lo>>48), - uint16(x1Hi>>0) > uint16(x2Hi>>0), uint16(x1Hi>>16) > uint16(x2Hi>>16), - uint16(x1Hi>>32) > uint16(x2Hi>>32), uint16(x1Hi>>48) > uint16(x2Hi>>48), - } - case v128CmpTypeI16x8LeS: - result = []bool{ - int16(x1Lo>>0) <= int16(x2Lo>>0), int16(x1Lo>>16) <= int16(x2Lo>>16), - int16(x1Lo>>32) <= int16(x2Lo>>32), int16(x1Lo>>48) <= int16(x2Lo>>48), - int16(x1Hi>>0) <= int16(x2Hi>>0), int16(x1Hi>>16) <= int16(x2Hi>>16), - int16(x1Hi>>32) <= int16(x2Hi>>32), int16(x1Hi>>48) <= int16(x2Hi>>48), - } - case v128CmpTypeI16x8LeU: - result = []bool{ - uint16(x1Lo>>0) <= uint16(x2Lo>>0), uint16(x1Lo>>16) <= uint16(x2Lo>>16), - uint16(x1Lo>>32) <= uint16(x2Lo>>32), uint16(x1Lo>>48) <= uint16(x2Lo>>48), - uint16(x1Hi>>0) <= uint16(x2Hi>>0), uint16(x1Hi>>16) <= uint16(x2Hi>>16), - uint16(x1Hi>>32) <= uint16(x2Hi>>32), uint16(x1Hi>>48) <= uint16(x2Hi>>48), - } - case v128CmpTypeI16x8GeS: - result = []bool{ - int16(x1Lo>>0) >= int16(x2Lo>>0), int16(x1Lo>>16) >= int16(x2Lo>>16), - int16(x1Lo>>32) >= int16(x2Lo>>32), int16(x1Lo>>48) >= int16(x2Lo>>48), - int16(x1Hi>>0) >= int16(x2Hi>>0), int16(x1Hi>>16) >= int16(x2Hi>>16), - int16(x1Hi>>32) >= int16(x2Hi>>32), int16(x1Hi>>48) >= int16(x2Hi>>48), - } - case v128CmpTypeI16x8GeU: - result = []bool{ - uint16(x1Lo>>0) >= uint16(x2Lo>>0), uint16(x1Lo>>16) >= uint16(x2Lo>>16), - uint16(x1Lo>>32) >= uint16(x2Lo>>32), uint16(x1Lo>>48) >= uint16(x2Lo>>48), - uint16(x1Hi>>0) >= uint16(x2Hi>>0), uint16(x1Hi>>16) >= uint16(x2Hi>>16), - uint16(x1Hi>>32) >= uint16(x2Hi>>32), uint16(x1Hi>>48) >= uint16(x2Hi>>48), - } - case v128CmpTypeI32x4Eq: - result = []bool{ - uint32(x1Lo>>0) == uint32(x2Lo>>0), uint32(x1Lo>>32) == uint32(x2Lo>>32), - uint32(x1Hi>>0) == uint32(x2Hi>>0), uint32(x1Hi>>32) == uint32(x2Hi>>32), - } - case v128CmpTypeI32x4Ne: - result = []bool{ - uint32(x1Lo>>0) != uint32(x2Lo>>0), uint32(x1Lo>>32) != uint32(x2Lo>>32), - uint32(x1Hi>>0) != uint32(x2Hi>>0), uint32(x1Hi>>32) != uint32(x2Hi>>32), - } - case v128CmpTypeI32x4LtS: - result = []bool{ - int32(x1Lo>>0) < int32(x2Lo>>0), int32(x1Lo>>32) < int32(x2Lo>>32), - int32(x1Hi>>0) < int32(x2Hi>>0), int32(x1Hi>>32) < int32(x2Hi>>32), - } - case v128CmpTypeI32x4LtU: - result = []bool{ - uint32(x1Lo>>0) < uint32(x2Lo>>0), uint32(x1Lo>>32) < uint32(x2Lo>>32), - uint32(x1Hi>>0) < uint32(x2Hi>>0), uint32(x1Hi>>32) < uint32(x2Hi>>32), - } - case v128CmpTypeI32x4GtS: - result = []bool{ - int32(x1Lo>>0) > int32(x2Lo>>0), int32(x1Lo>>32) > int32(x2Lo>>32), - int32(x1Hi>>0) > int32(x2Hi>>0), int32(x1Hi>>32) > int32(x2Hi>>32), - } - case v128CmpTypeI32x4GtU: - result = []bool{ - uint32(x1Lo>>0) > uint32(x2Lo>>0), uint32(x1Lo>>32) > uint32(x2Lo>>32), - uint32(x1Hi>>0) > uint32(x2Hi>>0), uint32(x1Hi>>32) > uint32(x2Hi>>32), - } - case v128CmpTypeI32x4LeS: - result = []bool{ - int32(x1Lo>>0) <= int32(x2Lo>>0), int32(x1Lo>>32) <= int32(x2Lo>>32), - int32(x1Hi>>0) <= int32(x2Hi>>0), int32(x1Hi>>32) <= int32(x2Hi>>32), - } - case v128CmpTypeI32x4LeU: - result = []bool{ - uint32(x1Lo>>0) <= uint32(x2Lo>>0), uint32(x1Lo>>32) <= uint32(x2Lo>>32), - uint32(x1Hi>>0) <= uint32(x2Hi>>0), uint32(x1Hi>>32) <= uint32(x2Hi>>32), - } - case v128CmpTypeI32x4GeS: - result = []bool{ - int32(x1Lo>>0) >= int32(x2Lo>>0), int32(x1Lo>>32) >= int32(x2Lo>>32), - int32(x1Hi>>0) >= int32(x2Hi>>0), int32(x1Hi>>32) >= int32(x2Hi>>32), - } - case v128CmpTypeI32x4GeU: - result = []bool{ - uint32(x1Lo>>0) >= uint32(x2Lo>>0), uint32(x1Lo>>32) >= uint32(x2Lo>>32), - uint32(x1Hi>>0) >= uint32(x2Hi>>0), uint32(x1Hi>>32) >= uint32(x2Hi>>32), - } - case v128CmpTypeI64x2Eq: - result = []bool{x1Lo == x2Lo, x1Hi == x2Hi} - case v128CmpTypeI64x2Ne: - result = []bool{x1Lo != x2Lo, x1Hi != x2Hi} - case v128CmpTypeI64x2LtS: - result = []bool{int64(x1Lo) < int64(x2Lo), int64(x1Hi) < int64(x2Hi)} - case v128CmpTypeI64x2GtS: - result = []bool{int64(x1Lo) > int64(x2Lo), int64(x1Hi) > int64(x2Hi)} - case v128CmpTypeI64x2LeS: - result = []bool{int64(x1Lo) <= int64(x2Lo), int64(x1Hi) <= int64(x2Hi)} - case v128CmpTypeI64x2GeS: - result = []bool{int64(x1Lo) >= int64(x2Lo), int64(x1Hi) >= int64(x2Hi)} - case v128CmpTypeF32x4Eq: - result = []bool{ - math.Float32frombits(uint32(x1Lo>>0)) == math.Float32frombits(uint32(x2Lo>>0)), - math.Float32frombits(uint32(x1Lo>>32)) == math.Float32frombits(uint32(x2Lo>>32)), - math.Float32frombits(uint32(x1Hi>>0)) == math.Float32frombits(uint32(x2Hi>>0)), - math.Float32frombits(uint32(x1Hi>>32)) == math.Float32frombits(uint32(x2Hi>>32)), - } - case v128CmpTypeF32x4Ne: - result = []bool{ - math.Float32frombits(uint32(x1Lo>>0)) != math.Float32frombits(uint32(x2Lo>>0)), - math.Float32frombits(uint32(x1Lo>>32)) != math.Float32frombits(uint32(x2Lo>>32)), - math.Float32frombits(uint32(x1Hi>>0)) != math.Float32frombits(uint32(x2Hi>>0)), - math.Float32frombits(uint32(x1Hi>>32)) != math.Float32frombits(uint32(x2Hi>>32)), - } - case v128CmpTypeF32x4Lt: - result = []bool{ - math.Float32frombits(uint32(x1Lo>>0)) < math.Float32frombits(uint32(x2Lo>>0)), - math.Float32frombits(uint32(x1Lo>>32)) < math.Float32frombits(uint32(x2Lo>>32)), - math.Float32frombits(uint32(x1Hi>>0)) < math.Float32frombits(uint32(x2Hi>>0)), - math.Float32frombits(uint32(x1Hi>>32)) < math.Float32frombits(uint32(x2Hi>>32)), - } - case v128CmpTypeF32x4Gt: - result = []bool{ - math.Float32frombits(uint32(x1Lo>>0)) > math.Float32frombits(uint32(x2Lo>>0)), - math.Float32frombits(uint32(x1Lo>>32)) > math.Float32frombits(uint32(x2Lo>>32)), - math.Float32frombits(uint32(x1Hi>>0)) > math.Float32frombits(uint32(x2Hi>>0)), - math.Float32frombits(uint32(x1Hi>>32)) > math.Float32frombits(uint32(x2Hi>>32)), - } - case v128CmpTypeF32x4Le: - result = []bool{ - math.Float32frombits(uint32(x1Lo>>0)) <= math.Float32frombits(uint32(x2Lo>>0)), - math.Float32frombits(uint32(x1Lo>>32)) <= math.Float32frombits(uint32(x2Lo>>32)), - math.Float32frombits(uint32(x1Hi>>0)) <= math.Float32frombits(uint32(x2Hi>>0)), - math.Float32frombits(uint32(x1Hi>>32)) <= math.Float32frombits(uint32(x2Hi>>32)), - } - case v128CmpTypeF32x4Ge: - result = []bool{ - math.Float32frombits(uint32(x1Lo>>0)) >= math.Float32frombits(uint32(x2Lo>>0)), - math.Float32frombits(uint32(x1Lo>>32)) >= math.Float32frombits(uint32(x2Lo>>32)), - math.Float32frombits(uint32(x1Hi>>0)) >= math.Float32frombits(uint32(x2Hi>>0)), - math.Float32frombits(uint32(x1Hi>>32)) >= math.Float32frombits(uint32(x2Hi>>32)), - } - case v128CmpTypeF64x2Eq: - result = []bool{ - math.Float64frombits(x1Lo) == math.Float64frombits(x2Lo), - math.Float64frombits(x1Hi) == math.Float64frombits(x2Hi), - } - case v128CmpTypeF64x2Ne: - result = []bool{ - math.Float64frombits(x1Lo) != math.Float64frombits(x2Lo), - math.Float64frombits(x1Hi) != math.Float64frombits(x2Hi), - } - case v128CmpTypeF64x2Lt: - result = []bool{ - math.Float64frombits(x1Lo) < math.Float64frombits(x2Lo), - math.Float64frombits(x1Hi) < math.Float64frombits(x2Hi), - } - case v128CmpTypeF64x2Gt: - result = []bool{ - math.Float64frombits(x1Lo) > math.Float64frombits(x2Lo), - math.Float64frombits(x1Hi) > math.Float64frombits(x2Hi), - } - case v128CmpTypeF64x2Le: - result = []bool{ - math.Float64frombits(x1Lo) <= math.Float64frombits(x2Lo), - math.Float64frombits(x1Hi) <= math.Float64frombits(x2Hi), - } - case v128CmpTypeF64x2Ge: - result = []bool{ - math.Float64frombits(x1Lo) >= math.Float64frombits(x2Lo), - math.Float64frombits(x1Hi) >= math.Float64frombits(x2Hi), - } - } - - var retLo, retHi uint64 - laneNum := len(result) - switch laneNum { - case 16: - for i, b := range result { - if b { - if i < 8 { - retLo |= 0xff << (i * 8) - } else { - retHi |= 0xff << ((i - 8) * 8) - } - } - } - case 8: - for i, b := range result { - if b { - if i < 4 { - retLo |= 0xffff << (i * 16) - } else { - retHi |= 0xffff << ((i - 4) * 16) - } - } - } - case 4: - for i, b := range result { - if b { - if i < 2 { - retLo |= 0xffff_ffff << (i * 32) - } else { - retHi |= 0xffff_ffff << ((i - 2) * 32) - } - } - } - case 2: - if result[0] { - retLo = ^uint64(0) - } - if result[1] { - retHi = ^uint64(0) - } - } - - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128AddSat: - x2hi, x2Lo := ce.popValue(), ce.popValue() - x1hi, x1Lo := ce.popValue(), ce.popValue() - - var retLo, retHi uint64 - - // Lane-wise addition while saturating the overflowing values. - // https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-addition - switch op.B1 { - case shapeI8x16: - for i := 0; i < 16; i++ { - var v, w byte - if i < 8 { - v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8)) - } else { - v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8)) - } - - var uv uint64 - if op.B3 { // signed - if subbed := int64(int8(v)) + int64(int8(w)); subbed < math.MinInt8 { - uv = uint64(byte(0x80)) - } else if subbed > math.MaxInt8 { - uv = uint64(byte(0x7f)) - } else { - uv = uint64(byte(int8(subbed))) - } - } else { - if subbed := int64(v) + int64(w); subbed < 0 { - uv = uint64(byte(0)) - } else if subbed > math.MaxUint8 { - uv = uint64(byte(0xff)) - } else { - uv = uint64(byte(subbed)) - } - } - - if i < 8 { // first 8 lanes are on lower 64bits. - retLo |= uv << (i * 8) - } else { - retHi |= uv << ((i - 8) * 8) - } - } - case shapeI16x8: - for i := 0; i < 8; i++ { - var v, w uint16 - if i < 4 { - v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16)) - } else { - v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16)) - } - - var uv uint64 - if op.B3 { // signed - if added := int64(int16(v)) + int64(int16(w)); added < math.MinInt16 { - uv = uint64(uint16(0x8000)) - } else if added > math.MaxInt16 { - uv = uint64(uint16(0x7fff)) - } else { - uv = uint64(uint16(int16(added))) - } - } else { - if added := int64(v) + int64(w); added < 0 { - uv = uint64(uint16(0)) - } else if added > math.MaxUint16 { - uv = uint64(uint16(0xffff)) - } else { - uv = uint64(uint16(added)) - } - } - - if i < 4 { // first 4 lanes are on lower 64bits. - retLo |= uv << (i * 16) - } else { - retHi |= uv << ((i - 4) * 16) - } - } - } - - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128SubSat: - x2hi, x2Lo := ce.popValue(), ce.popValue() - x1hi, x1Lo := ce.popValue(), ce.popValue() - - var retLo, retHi uint64 - - // Lane-wise subtraction while saturating the overflowing values. - // https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-subtraction - switch op.B1 { - case shapeI8x16: - for i := 0; i < 16; i++ { - var v, w byte - if i < 8 { - v, w = byte(x1Lo>>(i*8)), byte(x2Lo>>(i*8)) - } else { - v, w = byte(x1hi>>((i-8)*8)), byte(x2hi>>((i-8)*8)) - } - - var uv uint64 - if op.B3 { // signed - if subbed := int64(int8(v)) - int64(int8(w)); subbed < math.MinInt8 { - uv = uint64(byte(0x80)) - } else if subbed > math.MaxInt8 { - uv = uint64(byte(0x7f)) - } else { - uv = uint64(byte(int8(subbed))) - } - } else { - if subbed := int64(v) - int64(w); subbed < 0 { - uv = uint64(byte(0)) - } else if subbed > math.MaxUint8 { - uv = uint64(byte(0xff)) - } else { - uv = uint64(byte(subbed)) - } - } - - if i < 8 { - retLo |= uv << (i * 8) - } else { - retHi |= uv << ((i - 8) * 8) - } - } - case shapeI16x8: - for i := 0; i < 8; i++ { - var v, w uint16 - if i < 4 { - v, w = uint16(x1Lo>>(i*16)), uint16(x2Lo>>(i*16)) - } else { - v, w = uint16(x1hi>>((i-4)*16)), uint16(x2hi>>((i-4)*16)) - } - - var uv uint64 - if op.B3 { // signed - if subbed := int64(int16(v)) - int64(int16(w)); subbed < math.MinInt16 { - uv = uint64(uint16(0x8000)) - } else if subbed > math.MaxInt16 { - uv = uint64(uint16(0x7fff)) - } else { - uv = uint64(uint16(int16(subbed))) - } - } else { - if subbed := int64(v) - int64(w); subbed < 0 { - uv = uint64(uint16(0)) - } else if subbed > math.MaxUint16 { - uv = uint64(uint16(0xffff)) - } else { - uv = uint64(uint16(subbed)) - } - } - - if i < 4 { - retLo |= uv << (i * 16) - } else { - retHi |= uv << ((i - 4) * 16) - } - } - } - - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128Mul: - x2hi, x2lo := ce.popValue(), ce.popValue() - x1hi, x1lo := ce.popValue(), ce.popValue() - var retLo, retHi uint64 - switch op.B1 { - case shapeI16x8: - retHi = uint64(uint16(x1hi)*uint16(x2hi)) | (uint64(uint16(x1hi>>16)*uint16(x2hi>>16)) << 16) | - (uint64(uint16(x1hi>>32)*uint16(x2hi>>32)) << 32) | (uint64(uint16(x1hi>>48)*uint16(x2hi>>48)) << 48) - retLo = uint64(uint16(x1lo)*uint16(x2lo)) | (uint64(uint16(x1lo>>16)*uint16(x2lo>>16)) << 16) | - (uint64(uint16(x1lo>>32)*uint16(x2lo>>32)) << 32) | (uint64(uint16(x1lo>>48)*uint16(x2lo>>48)) << 48) - case shapeI32x4: - retHi = uint64(uint32(x1hi)*uint32(x2hi)) | (uint64(uint32(x1hi>>32)*uint32(x2hi>>32)) << 32) - retLo = uint64(uint32(x1lo)*uint32(x2lo)) | (uint64(uint32(x1lo>>32)*uint32(x2lo>>32)) << 32) - case shapeI64x2: - retHi = x1hi * x2hi - retLo = x1lo * x2lo - case shapeF32x4: - retHi = mulFloat32bits(uint32(x1hi), uint32(x2hi)) | mulFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32 - retLo = mulFloat32bits(uint32(x1lo), uint32(x2lo)) | mulFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32 - case shapeF64x2: - retHi = math.Float64bits(math.Float64frombits(x1hi) * math.Float64frombits(x2hi)) - retLo = math.Float64bits(math.Float64frombits(x1lo) * math.Float64frombits(x2lo)) - } - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128Div: - x2hi, x2lo := ce.popValue(), ce.popValue() - x1hi, x1lo := ce.popValue(), ce.popValue() - var retLo, retHi uint64 - if op.B1 == shapeF64x2 { - retHi = math.Float64bits(math.Float64frombits(x1hi) / math.Float64frombits(x2hi)) - retLo = math.Float64bits(math.Float64frombits(x1lo) / math.Float64frombits(x2lo)) - } else { - retHi = divFloat32bits(uint32(x1hi), uint32(x2hi)) | divFloat32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32 - retLo = divFloat32bits(uint32(x1lo), uint32(x2lo)) | divFloat32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32 - } - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128Neg: - hi, lo := ce.popValue(), ce.popValue() - switch op.B1 { - case shapeI8x16: - lo = uint64(-byte(lo)) | (uint64(-byte(lo>>8)) << 8) | - (uint64(-byte(lo>>16)) << 16) | (uint64(-byte(lo>>24)) << 24) | - (uint64(-byte(lo>>32)) << 32) | (uint64(-byte(lo>>40)) << 40) | - (uint64(-byte(lo>>48)) << 48) | (uint64(-byte(lo>>56)) << 56) - hi = uint64(-byte(hi)) | (uint64(-byte(hi>>8)) << 8) | - (uint64(-byte(hi>>16)) << 16) | (uint64(-byte(hi>>24)) << 24) | - (uint64(-byte(hi>>32)) << 32) | (uint64(-byte(hi>>40)) << 40) | - (uint64(-byte(hi>>48)) << 48) | (uint64(-byte(hi>>56)) << 56) - case shapeI16x8: - hi = uint64(-uint16(hi)) | (uint64(-uint16(hi>>16)) << 16) | - (uint64(-uint16(hi>>32)) << 32) | (uint64(-uint16(hi>>48)) << 48) - lo = uint64(-uint16(lo)) | (uint64(-uint16(lo>>16)) << 16) | - (uint64(-uint16(lo>>32)) << 32) | (uint64(-uint16(lo>>48)) << 48) - case shapeI32x4: - hi = uint64(-uint32(hi)) | (uint64(-uint32(hi>>32)) << 32) - lo = uint64(-uint32(lo)) | (uint64(-uint32(lo>>32)) << 32) - case shapeI64x2: - hi = -hi - lo = -lo - case shapeF32x4: - hi = uint64(math.Float32bits(-math.Float32frombits(uint32(hi)))) | - (uint64(math.Float32bits(-math.Float32frombits(uint32(hi>>32)))) << 32) - lo = uint64(math.Float32bits(-math.Float32frombits(uint32(lo)))) | - (uint64(math.Float32bits(-math.Float32frombits(uint32(lo>>32)))) << 32) - case shapeF64x2: - hi = math.Float64bits(-math.Float64frombits(hi)) - lo = math.Float64bits(-math.Float64frombits(lo)) - } - ce.pushValue(lo) - ce.pushValue(hi) - frame.pc++ - case operationKindV128Sqrt: - hi, lo := ce.popValue(), ce.popValue() - if op.B1 == shapeF64x2 { - hi = math.Float64bits(math.Sqrt(math.Float64frombits(hi))) - lo = math.Float64bits(math.Sqrt(math.Float64frombits(lo))) - } else { - hi = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi))))))) | - (uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(hi>>32))))))) << 32) - lo = uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo))))))) | - (uint64(math.Float32bits(float32(math.Sqrt(float64(math.Float32frombits(uint32(lo>>32))))))) << 32) - } - ce.pushValue(lo) - ce.pushValue(hi) - frame.pc++ - case operationKindV128Abs: - hi, lo := ce.popValue(), ce.popValue() - switch op.B1 { - case shapeI8x16: - lo = uint64(i8Abs(byte(lo))) | (uint64(i8Abs(byte(lo>>8))) << 8) | - (uint64(i8Abs(byte(lo>>16))) << 16) | (uint64(i8Abs(byte(lo>>24))) << 24) | - (uint64(i8Abs(byte(lo>>32))) << 32) | (uint64(i8Abs(byte(lo>>40))) << 40) | - (uint64(i8Abs(byte(lo>>48))) << 48) | (uint64(i8Abs(byte(lo>>56))) << 56) - hi = uint64(i8Abs(byte(hi))) | (uint64(i8Abs(byte(hi>>8))) << 8) | - (uint64(i8Abs(byte(hi>>16))) << 16) | (uint64(i8Abs(byte(hi>>24))) << 24) | - (uint64(i8Abs(byte(hi>>32))) << 32) | (uint64(i8Abs(byte(hi>>40))) << 40) | - (uint64(i8Abs(byte(hi>>48))) << 48) | (uint64(i8Abs(byte(hi>>56))) << 56) - case shapeI16x8: - hi = uint64(i16Abs(uint16(hi))) | (uint64(i16Abs(uint16(hi>>16))) << 16) | - (uint64(i16Abs(uint16(hi>>32))) << 32) | (uint64(i16Abs(uint16(hi>>48))) << 48) - lo = uint64(i16Abs(uint16(lo))) | (uint64(i16Abs(uint16(lo>>16))) << 16) | - (uint64(i16Abs(uint16(lo>>32))) << 32) | (uint64(i16Abs(uint16(lo>>48))) << 48) - case shapeI32x4: - hi = uint64(i32Abs(uint32(hi))) | (uint64(i32Abs(uint32(hi>>32))) << 32) - lo = uint64(i32Abs(uint32(lo))) | (uint64(i32Abs(uint32(lo>>32))) << 32) - case shapeI64x2: - if int64(hi) < 0 { - hi = -hi - } - if int64(lo) < 0 { - lo = -lo - } - case shapeF32x4: - hi = hi &^ (1<<31 | 1<<63) - lo = lo &^ (1<<31 | 1<<63) - case shapeF64x2: - hi = hi &^ (1 << 63) - lo = lo &^ (1 << 63) - } - ce.pushValue(lo) - ce.pushValue(hi) - frame.pc++ - case operationKindV128Popcnt: - hi, lo := ce.popValue(), ce.popValue() - var retLo, retHi uint64 - for i := 0; i < 16; i++ { - var v byte - if i < 8 { - v = byte(lo >> (i * 8)) - } else { - v = byte(hi >> ((i - 8) * 8)) - } - - var cnt uint64 - for i := 0; i < 8; i++ { - if (v>>i)&0b1 != 0 { - cnt++ - } - } - - if i < 8 { - retLo |= cnt << (i * 8) - } else { - retHi |= cnt << ((i - 8) * 8) - } - } - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128Min: - x2hi, x2lo := ce.popValue(), ce.popValue() - x1hi, x1lo := ce.popValue(), ce.popValue() - var retLo, retHi uint64 - switch op.B1 { - case shapeI8x16: - if op.B3 { // signed - retLo = uint64(i8MinS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinS(uint8(x1lo), uint8(x2lo))) | - uint64(i8MinS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 | - uint64(i8MinS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 | - uint64(i8MinS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48 - retHi = uint64(i8MinS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinS(uint8(x1hi), uint8(x2hi))) | - uint64(i8MinS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 | - uint64(i8MinS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 | - uint64(i8MinS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48 - } else { - retLo = uint64(i8MinU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MinU(uint8(x1lo), uint8(x2lo))) | - uint64(i8MinU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MinU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 | - uint64(i8MinU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MinU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 | - uint64(i8MinU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MinU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48 - retHi = uint64(i8MinU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MinU(uint8(x1hi), uint8(x2hi))) | - uint64(i8MinU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MinU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 | - uint64(i8MinU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MinU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 | - uint64(i8MinU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MinU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48 - } - case shapeI16x8: - if op.B3 { // signed - retLo = uint64(i16MinS(uint16(x1lo), uint16(x2lo))) | - uint64(i16MinS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 | - uint64(i16MinS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 | - uint64(i16MinS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48 - retHi = uint64(i16MinS(uint16(x1hi), uint16(x2hi))) | - uint64(i16MinS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 | - uint64(i16MinS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 | - uint64(i16MinS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48 - } else { - retLo = uint64(i16MinU(uint16(x1lo), uint16(x2lo))) | - uint64(i16MinU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 | - uint64(i16MinU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 | - uint64(i16MinU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48 - retHi = uint64(i16MinU(uint16(x1hi), uint16(x2hi))) | - uint64(i16MinU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 | - uint64(i16MinU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 | - uint64(i16MinU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48 - } - case shapeI32x4: - if op.B3 { // signed - retLo = uint64(i32MinS(uint32(x1lo), uint32(x2lo))) | - uint64(i32MinS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32 - retHi = uint64(i32MinS(uint32(x1hi), uint32(x2hi))) | - uint64(i32MinS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32 - } else { - retLo = uint64(i32MinU(uint32(x1lo), uint32(x2lo))) | - uint64(i32MinU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32 - retHi = uint64(i32MinU(uint32(x1hi), uint32(x2hi))) | - uint64(i32MinU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32 - } - case shapeF32x4: - retHi = wasmCompatMin32bits(uint32(x1hi), uint32(x2hi)) | - wasmCompatMin32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32 - retLo = wasmCompatMin32bits(uint32(x1lo), uint32(x2lo)) | - wasmCompatMin32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32 - case shapeF64x2: - retHi = math.Float64bits(moremath.WasmCompatMin64( - math.Float64frombits(x1hi), - math.Float64frombits(x2hi), - )) - retLo = math.Float64bits(moremath.WasmCompatMin64( - math.Float64frombits(x1lo), - math.Float64frombits(x2lo), - )) - } - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128Max: - x2hi, x2lo := ce.popValue(), ce.popValue() - x1hi, x1lo := ce.popValue(), ce.popValue() - var retLo, retHi uint64 - switch op.B1 { - case shapeI8x16: - if op.B3 { // signed - retLo = uint64(i8MaxS(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxS(uint8(x1lo), uint8(x2lo))) | - uint64(i8MaxS(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxS(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 | - uint64(i8MaxS(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxS(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 | - uint64(i8MaxS(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxS(uint8(x1lo>>48), uint8(x2lo>>48)))<<48 - retHi = uint64(i8MaxS(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxS(uint8(x1hi), uint8(x2hi))) | - uint64(i8MaxS(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxS(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 | - uint64(i8MaxS(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxS(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 | - uint64(i8MaxS(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxS(uint8(x1hi>>48), uint8(x2hi>>48)))<<48 - } else { - retLo = uint64(i8MaxU(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8MaxU(uint8(x1lo), uint8(x2lo))) | - uint64(i8MaxU(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8MaxU(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 | - uint64(i8MaxU(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8MaxU(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 | - uint64(i8MaxU(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8MaxU(uint8(x1lo>>48), uint8(x2lo>>48)))<<48 - retHi = uint64(i8MaxU(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8MaxU(uint8(x1hi), uint8(x2hi))) | - uint64(i8MaxU(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8MaxU(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 | - uint64(i8MaxU(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8MaxU(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 | - uint64(i8MaxU(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8MaxU(uint8(x1hi>>48), uint8(x2hi>>48)))<<48 - } - case shapeI16x8: - if op.B3 { // signed - retLo = uint64(i16MaxS(uint16(x1lo), uint16(x2lo))) | - uint64(i16MaxS(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 | - uint64(i16MaxS(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 | - uint64(i16MaxS(uint16(x1lo>>48), uint16(x2lo>>48)))<<48 - retHi = uint64(i16MaxS(uint16(x1hi), uint16(x2hi))) | - uint64(i16MaxS(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 | - uint64(i16MaxS(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 | - uint64(i16MaxS(uint16(x1hi>>48), uint16(x2hi>>48)))<<48 - } else { - retLo = uint64(i16MaxU(uint16(x1lo), uint16(x2lo))) | - uint64(i16MaxU(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 | - uint64(i16MaxU(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 | - uint64(i16MaxU(uint16(x1lo>>48), uint16(x2lo>>48)))<<48 - retHi = uint64(i16MaxU(uint16(x1hi), uint16(x2hi))) | - uint64(i16MaxU(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 | - uint64(i16MaxU(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 | - uint64(i16MaxU(uint16(x1hi>>48), uint16(x2hi>>48)))<<48 - } - case shapeI32x4: - if op.B3 { // signed - retLo = uint64(i32MaxS(uint32(x1lo), uint32(x2lo))) | - uint64(i32MaxS(uint32(x1lo>>32), uint32(x2lo>>32)))<<32 - retHi = uint64(i32MaxS(uint32(x1hi), uint32(x2hi))) | - uint64(i32MaxS(uint32(x1hi>>32), uint32(x2hi>>32)))<<32 - } else { - retLo = uint64(i32MaxU(uint32(x1lo), uint32(x2lo))) | - uint64(i32MaxU(uint32(x1lo>>32), uint32(x2lo>>32)))<<32 - retHi = uint64(i32MaxU(uint32(x1hi), uint32(x2hi))) | - uint64(i32MaxU(uint32(x1hi>>32), uint32(x2hi>>32)))<<32 - } - case shapeF32x4: - retHi = wasmCompatMax32bits(uint32(x1hi), uint32(x2hi)) | - wasmCompatMax32bits(uint32(x1hi>>32), uint32(x2hi>>32))<<32 - retLo = wasmCompatMax32bits(uint32(x1lo), uint32(x2lo)) | - wasmCompatMax32bits(uint32(x1lo>>32), uint32(x2lo>>32))<<32 - case shapeF64x2: - retHi = math.Float64bits(moremath.WasmCompatMax64( - math.Float64frombits(x1hi), - math.Float64frombits(x2hi), - )) - retLo = math.Float64bits(moremath.WasmCompatMax64( - math.Float64frombits(x1lo), - math.Float64frombits(x2lo), - )) - } - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128AvgrU: - x2hi, x2lo := ce.popValue(), ce.popValue() - x1hi, x1lo := ce.popValue(), ce.popValue() - var retLo, retHi uint64 - switch op.B1 { - case shapeI8x16: - retLo = uint64(i8RoundingAverage(uint8(x1lo>>8), uint8(x2lo>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1lo), uint8(x2lo))) | - uint64(i8RoundingAverage(uint8(x1lo>>24), uint8(x2lo>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1lo>>16), uint8(x2lo>>16)))<<16 | - uint64(i8RoundingAverage(uint8(x1lo>>40), uint8(x2lo>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1lo>>32), uint8(x2lo>>32)))<<32 | - uint64(i8RoundingAverage(uint8(x1lo>>56), uint8(x2lo>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1lo>>48), uint8(x2lo>>48)))<<48 - retHi = uint64(i8RoundingAverage(uint8(x1hi>>8), uint8(x2hi>>8)))<<8 | uint64(i8RoundingAverage(uint8(x1hi), uint8(x2hi))) | - uint64(i8RoundingAverage(uint8(x1hi>>24), uint8(x2hi>>24)))<<24 | uint64(i8RoundingAverage(uint8(x1hi>>16), uint8(x2hi>>16)))<<16 | - uint64(i8RoundingAverage(uint8(x1hi>>40), uint8(x2hi>>40)))<<40 | uint64(i8RoundingAverage(uint8(x1hi>>32), uint8(x2hi>>32)))<<32 | - uint64(i8RoundingAverage(uint8(x1hi>>56), uint8(x2hi>>56)))<<56 | uint64(i8RoundingAverage(uint8(x1hi>>48), uint8(x2hi>>48)))<<48 - case shapeI16x8: - retLo = uint64(i16RoundingAverage(uint16(x1lo), uint16(x2lo))) | - uint64(i16RoundingAverage(uint16(x1lo>>16), uint16(x2lo>>16)))<<16 | - uint64(i16RoundingAverage(uint16(x1lo>>32), uint16(x2lo>>32)))<<32 | - uint64(i16RoundingAverage(uint16(x1lo>>48), uint16(x2lo>>48)))<<48 - retHi = uint64(i16RoundingAverage(uint16(x1hi), uint16(x2hi))) | - uint64(i16RoundingAverage(uint16(x1hi>>16), uint16(x2hi>>16)))<<16 | - uint64(i16RoundingAverage(uint16(x1hi>>32), uint16(x2hi>>32)))<<32 | - uint64(i16RoundingAverage(uint16(x1hi>>48), uint16(x2hi>>48)))<<48 - } - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128Pmin: - x2hi, x2lo := ce.popValue(), ce.popValue() - x1hi, x1lo := ce.popValue(), ce.popValue() - var retLo, retHi uint64 - if op.B1 == shapeF32x4 { - if flt32(math.Float32frombits(uint32(x2lo)), math.Float32frombits(uint32(x1lo))) { - retLo = x2lo & 0x00000000_ffffffff - } else { - retLo = x1lo & 0x00000000_ffffffff - } - if flt32(math.Float32frombits(uint32(x2lo>>32)), math.Float32frombits(uint32(x1lo>>32))) { - retLo |= x2lo & 0xffffffff_00000000 - } else { - retLo |= x1lo & 0xffffffff_00000000 - } - if flt32(math.Float32frombits(uint32(x2hi)), math.Float32frombits(uint32(x1hi))) { - retHi = x2hi & 0x00000000_ffffffff - } else { - retHi = x1hi & 0x00000000_ffffffff - } - if flt32(math.Float32frombits(uint32(x2hi>>32)), math.Float32frombits(uint32(x1hi>>32))) { - retHi |= x2hi & 0xffffffff_00000000 - } else { - retHi |= x1hi & 0xffffffff_00000000 - } - } else { - if flt64(math.Float64frombits(x2lo), math.Float64frombits(x1lo)) { - retLo = x2lo - } else { - retLo = x1lo - } - if flt64(math.Float64frombits(x2hi), math.Float64frombits(x1hi)) { - retHi = x2hi - } else { - retHi = x1hi - } - } - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128Pmax: - x2hi, x2lo := ce.popValue(), ce.popValue() - x1hi, x1lo := ce.popValue(), ce.popValue() - var retLo, retHi uint64 - if op.B1 == shapeF32x4 { - if flt32(math.Float32frombits(uint32(x1lo)), math.Float32frombits(uint32(x2lo))) { - retLo = x2lo & 0x00000000_ffffffff - } else { - retLo = x1lo & 0x00000000_ffffffff - } - if flt32(math.Float32frombits(uint32(x1lo>>32)), math.Float32frombits(uint32(x2lo>>32))) { - retLo |= x2lo & 0xffffffff_00000000 - } else { - retLo |= x1lo & 0xffffffff_00000000 - } - if flt32(math.Float32frombits(uint32(x1hi)), math.Float32frombits(uint32(x2hi))) { - retHi = x2hi & 0x00000000_ffffffff - } else { - retHi = x1hi & 0x00000000_ffffffff - } - if flt32(math.Float32frombits(uint32(x1hi>>32)), math.Float32frombits(uint32(x2hi>>32))) { - retHi |= x2hi & 0xffffffff_00000000 - } else { - retHi |= x1hi & 0xffffffff_00000000 - } - } else { - if flt64(math.Float64frombits(x1lo), math.Float64frombits(x2lo)) { - retLo = x2lo - } else { - retLo = x1lo - } - if flt64(math.Float64frombits(x1hi), math.Float64frombits(x2hi)) { - retHi = x2hi - } else { - retHi = x1hi - } - } - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128Ceil: - hi, lo := ce.popValue(), ce.popValue() - if op.B1 == shapeF32x4 { - lo = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo))))) | - (uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(lo>>32))))) << 32) - hi = uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi))))) | - (uint64(math.Float32bits(moremath.WasmCompatCeilF32(math.Float32frombits(uint32(hi>>32))))) << 32) - } else { - lo = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(lo))) - hi = math.Float64bits(moremath.WasmCompatCeilF64(math.Float64frombits(hi))) - } - ce.pushValue(lo) - ce.pushValue(hi) - frame.pc++ - case operationKindV128Floor: - hi, lo := ce.popValue(), ce.popValue() - if op.B1 == shapeF32x4 { - lo = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo))))) | - (uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(lo>>32))))) << 32) - hi = uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi))))) | - (uint64(math.Float32bits(moremath.WasmCompatFloorF32(math.Float32frombits(uint32(hi>>32))))) << 32) - } else { - lo = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(lo))) - hi = math.Float64bits(moremath.WasmCompatFloorF64(math.Float64frombits(hi))) - } - ce.pushValue(lo) - ce.pushValue(hi) - frame.pc++ - case operationKindV128Trunc: - hi, lo := ce.popValue(), ce.popValue() - if op.B1 == shapeF32x4 { - lo = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo))))) | - (uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(lo>>32))))) << 32) - hi = uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi))))) | - (uint64(math.Float32bits(moremath.WasmCompatTruncF32(math.Float32frombits(uint32(hi>>32))))) << 32) - } else { - lo = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(lo))) - hi = math.Float64bits(moremath.WasmCompatTruncF64(math.Float64frombits(hi))) - } - ce.pushValue(lo) - ce.pushValue(hi) - frame.pc++ - case operationKindV128Nearest: - hi, lo := ce.popValue(), ce.popValue() - if op.B1 == shapeF32x4 { - lo = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo))))) | - (uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(lo>>32))))) << 32) - hi = uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi))))) | - (uint64(math.Float32bits(moremath.WasmCompatNearestF32(math.Float32frombits(uint32(hi>>32))))) << 32) - } else { - lo = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(lo))) - hi = math.Float64bits(moremath.WasmCompatNearestF64(math.Float64frombits(hi))) - } - ce.pushValue(lo) - ce.pushValue(hi) - frame.pc++ - case operationKindV128Extend: - hi, lo := ce.popValue(), ce.popValue() - var origin uint64 - if op.B3 { // use lower 64 bits - origin = lo - } else { - origin = hi - } - - signed := op.B2 == 1 - - var retHi, retLo uint64 - switch op.B1 { - case shapeI8x16: - for i := 0; i < 8; i++ { - v8 := byte(origin >> (i * 8)) - - var v16 uint16 - if signed { - v16 = uint16(int8(v8)) - } else { - v16 = uint16(v8) - } - - if i < 4 { - retLo |= uint64(v16) << (i * 16) - } else { - retHi |= uint64(v16) << ((i - 4) * 16) - } - } - case shapeI16x8: - for i := 0; i < 4; i++ { - v16 := uint16(origin >> (i * 16)) - - var v32 uint32 - if signed { - v32 = uint32(int16(v16)) - } else { - v32 = uint32(v16) - } - - if i < 2 { - retLo |= uint64(v32) << (i * 32) - } else { - retHi |= uint64(v32) << ((i - 2) * 32) - } - } - case shapeI32x4: - v32Lo := uint32(origin) - v32Hi := uint32(origin >> 32) - if signed { - retLo = uint64(int32(v32Lo)) - retHi = uint64(int32(v32Hi)) - } else { - retLo = uint64(v32Lo) - retHi = uint64(v32Hi) - } - } - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128ExtMul: - x2Hi, x2Lo := ce.popValue(), ce.popValue() - x1Hi, x1Lo := ce.popValue(), ce.popValue() - var x1, x2 uint64 - if op.B3 { // use lower 64 bits - x1, x2 = x1Lo, x2Lo - } else { - x1, x2 = x1Hi, x2Hi - } - - signed := op.B2 == 1 - - var retLo, retHi uint64 - switch op.B1 { - case shapeI8x16: - for i := 0; i < 8; i++ { - v1, v2 := byte(x1>>(i*8)), byte(x2>>(i*8)) - - var v16 uint16 - if signed { - v16 = uint16(int16(int8(v1)) * int16(int8(v2))) - } else { - v16 = uint16(v1) * uint16(v2) - } - - if i < 4 { - retLo |= uint64(v16) << (i * 16) - } else { - retHi |= uint64(v16) << ((i - 4) * 16) - } - } - case shapeI16x8: - for i := 0; i < 4; i++ { - v1, v2 := uint16(x1>>(i*16)), uint16(x2>>(i*16)) - - var v32 uint32 - if signed { - v32 = uint32(int32(int16(v1)) * int32(int16(v2))) - } else { - v32 = uint32(v1) * uint32(v2) - } - - if i < 2 { - retLo |= uint64(v32) << (i * 32) - } else { - retHi |= uint64(v32) << ((i - 2) * 32) - } - } - case shapeI32x4: - v1Lo, v2Lo := uint32(x1), uint32(x2) - v1Hi, v2Hi := uint32(x1>>32), uint32(x2>>32) - if signed { - retLo = uint64(int64(int32(v1Lo)) * int64(int32(v2Lo))) - retHi = uint64(int64(int32(v1Hi)) * int64(int32(v2Hi))) - } else { - retLo = uint64(v1Lo) * uint64(v2Lo) - retHi = uint64(v1Hi) * uint64(v2Hi) - } - } - - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128Q15mulrSatS: - x2hi, x2Lo := ce.popValue(), ce.popValue() - x1hi, x1Lo := ce.popValue(), ce.popValue() - var retLo, retHi uint64 - for i := 0; i < 8; i++ { - var v, w int16 - if i < 4 { - v, w = int16(uint16(x1Lo>>(i*16))), int16(uint16(x2Lo>>(i*16))) - } else { - v, w = int16(uint16(x1hi>>((i-4)*16))), int16(uint16(x2hi>>((i-4)*16))) - } - - var uv uint64 - // https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#saturating-integer-q-format-rounding-multiplication - if calc := ((int32(v) * int32(w)) + 0x4000) >> 15; calc < math.MinInt16 { - uv = uint64(uint16(0x8000)) - } else if calc > math.MaxInt16 { - uv = uint64(uint16(0x7fff)) - } else { - uv = uint64(uint16(int16(calc))) - } - - if i < 4 { - retLo |= uv << (i * 16) - } else { - retHi |= uv << ((i - 4) * 16) - } - } - - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128ExtAddPairwise: - hi, lo := ce.popValue(), ce.popValue() - - signed := op.B3 - - var retLo, retHi uint64 - switch op.B1 { - case shapeI8x16: - for i := 0; i < 8; i++ { - var v1, v2 byte - if i < 4 { - v1, v2 = byte(lo>>((i*2)*8)), byte(lo>>((i*2+1)*8)) - } else { - v1, v2 = byte(hi>>(((i-4)*2)*8)), byte(hi>>(((i-4)*2+1)*8)) - } - - var v16 uint16 - if signed { - v16 = uint16(int16(int8(v1)) + int16(int8(v2))) - } else { - v16 = uint16(v1) + uint16(v2) - } - - if i < 4 { - retLo |= uint64(v16) << (i * 16) - } else { - retHi |= uint64(v16) << ((i - 4) * 16) - } - } - case shapeI16x8: - for i := 0; i < 4; i++ { - var v1, v2 uint16 - if i < 2 { - v1, v2 = uint16(lo>>((i*2)*16)), uint16(lo>>((i*2+1)*16)) - } else { - v1, v2 = uint16(hi>>(((i-2)*2)*16)), uint16(hi>>(((i-2)*2+1)*16)) - } - - var v32 uint32 - if signed { - v32 = uint32(int32(int16(v1)) + int32(int16(v2))) - } else { - v32 = uint32(v1) + uint32(v2) - } - - if i < 2 { - retLo |= uint64(v32) << (i * 32) - } else { - retHi |= uint64(v32) << ((i - 2) * 32) - } - } - } - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128FloatPromote: - _, toPromote := ce.popValue(), ce.popValue() - ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote))))) - ce.pushValue(math.Float64bits(float64(math.Float32frombits(uint32(toPromote >> 32))))) - frame.pc++ - case operationKindV128FloatDemote: - hi, lo := ce.popValue(), ce.popValue() - ce.pushValue( - uint64(math.Float32bits(float32(math.Float64frombits(lo)))) | - (uint64(math.Float32bits(float32(math.Float64frombits(hi)))) << 32), - ) - ce.pushValue(0) - frame.pc++ - case operationKindV128FConvertFromI: - hi, lo := ce.popValue(), ce.popValue() - v1, v2, v3, v4 := uint32(lo), uint32(lo>>32), uint32(hi), uint32(hi>>32) - signed := op.B3 - - var retLo, retHi uint64 - switch op.B1 { // Destination shape. - case shapeF32x4: // f32x4 from signed/unsigned i32x4 - if signed { - retLo = uint64(math.Float32bits(float32(int32(v1)))) | - (uint64(math.Float32bits(float32(int32(v2)))) << 32) - retHi = uint64(math.Float32bits(float32(int32(v3)))) | - (uint64(math.Float32bits(float32(int32(v4)))) << 32) - } else { - retLo = uint64(math.Float32bits(float32(v1))) | - (uint64(math.Float32bits(float32(v2))) << 32) - retHi = uint64(math.Float32bits(float32(v3))) | - (uint64(math.Float32bits(float32(v4))) << 32) - } - case shapeF64x2: // f64x2 from signed/unsigned i32x4 - if signed { - retLo, retHi = math.Float64bits(float64(int32(v1))), math.Float64bits(float64(int32(v2))) - } else { - retLo, retHi = math.Float64bits(float64(v1)), math.Float64bits(float64(v2)) - } - } - - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128Narrow: - x2Hi, x2Lo := ce.popValue(), ce.popValue() - x1Hi, x1Lo := ce.popValue(), ce.popValue() - signed := op.B3 - - var retLo, retHi uint64 - switch op.B1 { - case shapeI16x8: // signed/unsigned i16x8 to i8x16 - for i := 0; i < 8; i++ { - var v16 uint16 - if i < 4 { - v16 = uint16(x1Lo >> (i * 16)) - } else { - v16 = uint16(x1Hi >> ((i - 4) * 16)) - } - - var v byte - if signed { - if s := int16(v16); s > math.MaxInt8 { - v = math.MaxInt8 - } else if s < math.MinInt8 { - s = math.MinInt8 - v = byte(s) - } else { - v = byte(v16) - } - } else { - if s := int16(v16); s > math.MaxUint8 { - v = math.MaxUint8 - } else if s < 0 { - v = 0 - } else { - v = byte(v16) - } - } - retLo |= uint64(v) << (i * 8) - } - for i := 0; i < 8; i++ { - var v16 uint16 - if i < 4 { - v16 = uint16(x2Lo >> (i * 16)) - } else { - v16 = uint16(x2Hi >> ((i - 4) * 16)) - } - - var v byte - if signed { - if s := int16(v16); s > math.MaxInt8 { - v = math.MaxInt8 - } else if s < math.MinInt8 { - s = math.MinInt8 - v = byte(s) - } else { - v = byte(v16) - } - } else { - if s := int16(v16); s > math.MaxUint8 { - v = math.MaxUint8 - } else if s < 0 { - v = 0 - } else { - v = byte(v16) - } - } - retHi |= uint64(v) << (i * 8) - } - case shapeI32x4: // signed/unsigned i32x4 to i16x8 - for i := 0; i < 4; i++ { - var v32 uint32 - if i < 2 { - v32 = uint32(x1Lo >> (i * 32)) - } else { - v32 = uint32(x1Hi >> ((i - 2) * 32)) - } - - var v uint16 - if signed { - if s := int32(v32); s > math.MaxInt16 { - v = math.MaxInt16 - } else if s < math.MinInt16 { - s = math.MinInt16 - v = uint16(s) - } else { - v = uint16(v32) - } - } else { - if s := int32(v32); s > math.MaxUint16 { - v = math.MaxUint16 - } else if s < 0 { - v = 0 - } else { - v = uint16(v32) - } - } - retLo |= uint64(v) << (i * 16) - } - - for i := 0; i < 4; i++ { - var v32 uint32 - if i < 2 { - v32 = uint32(x2Lo >> (i * 32)) - } else { - v32 = uint32(x2Hi >> ((i - 2) * 32)) - } - - var v uint16 - if signed { - if s := int32(v32); s > math.MaxInt16 { - v = math.MaxInt16 - } else if s < math.MinInt16 { - s = math.MinInt16 - v = uint16(s) - } else { - v = uint16(v32) - } - } else { - if s := int32(v32); s > math.MaxUint16 { - v = math.MaxUint16 - } else if s < 0 { - v = 0 - } else { - v = uint16(v32) - } - } - retHi |= uint64(v) << (i * 16) - } - } - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindV128Dot: - x2Hi, x2Lo := ce.popValue(), ce.popValue() - x1Hi, x1Lo := ce.popValue(), ce.popValue() - lo, hi := v128Dot(x1Hi, x1Lo, x2Hi, x2Lo) - ce.pushValue(lo) - ce.pushValue(hi) - frame.pc++ - case operationKindV128ITruncSatFromF: - hi, lo := ce.popValue(), ce.popValue() - signed := op.B3 - var retLo, retHi uint64 - - switch op.B1 { - case shapeF32x4: // f32x4 to i32x4 - for i, f64 := range [4]float64{ - math.Trunc(float64(math.Float32frombits(uint32(lo)))), - math.Trunc(float64(math.Float32frombits(uint32(lo >> 32)))), - math.Trunc(float64(math.Float32frombits(uint32(hi)))), - math.Trunc(float64(math.Float32frombits(uint32(hi >> 32)))), - } { - - var v uint32 - if math.IsNaN(f64) { - v = 0 - } else if signed { - if f64 < math.MinInt32 { - f64 = math.MinInt32 - } else if f64 > math.MaxInt32 { - f64 = math.MaxInt32 - } - v = uint32(int32(f64)) - } else { - if f64 < 0 { - f64 = 0 - } else if f64 > math.MaxUint32 { - f64 = math.MaxUint32 - } - v = uint32(f64) - } - - if i < 2 { - retLo |= uint64(v) << (i * 32) - } else { - retHi |= uint64(v) << ((i - 2) * 32) - } - } - - case shapeF64x2: // f64x2 to i32x4 - for i, f := range [2]float64{ - math.Trunc(math.Float64frombits(lo)), - math.Trunc(math.Float64frombits(hi)), - } { - var v uint32 - if math.IsNaN(f) { - v = 0 - } else if signed { - if f < math.MinInt32 { - f = math.MinInt32 - } else if f > math.MaxInt32 { - f = math.MaxInt32 - } - v = uint32(int32(f)) - } else { - if f < 0 { - f = 0 - } else if f > math.MaxUint32 { - f = math.MaxUint32 - } - v = uint32(f) - } - - retLo |= uint64(v) << (i * 32) - } - } - - ce.pushValue(retLo) - ce.pushValue(retHi) - frame.pc++ - case operationKindAtomicMemoryWait: - timeout := int64(ce.popValue()) - exp := ce.popValue() - offset := ce.popMemoryOffset(op) - // Runtime instead of validation error because the spec intends to allow binaries to include - // such instructions as long as they are not executed. - if !memoryInst.Shared { - panic(wasmruntime.ErrRuntimeExpectedSharedMemory) - } - - switch unsignedType(op.B1) { - case unsignedTypeI32: - if offset%4 != 0 { - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - } - if int(offset) > len(memoryInst.Buffer)-4 { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue(memoryInst.Wait32(offset, uint32(exp), timeout, func(mem *wasm.MemoryInstance, offset uint32) uint32 { - mem.Mux.Lock() - defer mem.Mux.Unlock() - value, _ := mem.ReadUint32Le(offset) - return value - })) - case unsignedTypeI64: - if offset%8 != 0 { - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - } - if int(offset) > len(memoryInst.Buffer)-8 { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue(memoryInst.Wait64(offset, exp, timeout, func(mem *wasm.MemoryInstance, offset uint32) uint64 { - mem.Mux.Lock() - defer mem.Mux.Unlock() - value, _ := mem.ReadUint64Le(offset) - return value - })) - } - frame.pc++ - case operationKindAtomicMemoryNotify: - count := ce.popValue() - offset := ce.popMemoryOffset(op) - if offset%4 != 0 { - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - } - // Just a bounds check - if offset >= memoryInst.Size() { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - res := memoryInst.Notify(offset, uint32(count)) - ce.pushValue(uint64(res)) - frame.pc++ - case operationKindAtomicFence: - // Memory not required for fence only - if memoryInst != nil { - // An empty critical section can be used as a synchronization primitive, which is what - // fence is. Probably, there are no spectests or defined behavior to confirm this yet. - memoryInst.Mux.Lock() - memoryInst.Mux.Unlock() //nolint:staticcheck - } - frame.pc++ - case operationKindAtomicLoad: - offset := ce.popMemoryOffset(op) - switch unsignedType(op.B1) { - case unsignedTypeI32: - if offset%4 != 0 { - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - } - memoryInst.Mux.Lock() - val, ok := memoryInst.ReadUint32Le(offset) - memoryInst.Mux.Unlock() - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue(uint64(val)) - case unsignedTypeI64: - if offset%8 != 0 { - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - } - memoryInst.Mux.Lock() - val, ok := memoryInst.ReadUint64Le(offset) - memoryInst.Mux.Unlock() - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue(val) - } - frame.pc++ - case operationKindAtomicLoad8: - offset := ce.popMemoryOffset(op) - memoryInst.Mux.Lock() - val, ok := memoryInst.ReadByte(offset) - memoryInst.Mux.Unlock() - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue(uint64(val)) - frame.pc++ - case operationKindAtomicLoad16: - offset := ce.popMemoryOffset(op) - if offset%2 != 0 { - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - } - memoryInst.Mux.Lock() - val, ok := memoryInst.ReadUint16Le(offset) - memoryInst.Mux.Unlock() - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - ce.pushValue(uint64(val)) - frame.pc++ - case operationKindAtomicStore: - val := ce.popValue() - offset := ce.popMemoryOffset(op) - switch unsignedType(op.B1) { - case unsignedTypeI32: - if offset%4 != 0 { - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - } - memoryInst.Mux.Lock() - ok := memoryInst.WriteUint32Le(offset, uint32(val)) - memoryInst.Mux.Unlock() - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - case unsignedTypeI64: - if offset%8 != 0 { - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - } - memoryInst.Mux.Lock() - ok := memoryInst.WriteUint64Le(offset, val) - memoryInst.Mux.Unlock() - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - } - frame.pc++ - case operationKindAtomicStore8: - val := byte(ce.popValue()) - offset := ce.popMemoryOffset(op) - memoryInst.Mux.Lock() - ok := memoryInst.WriteByte(offset, val) - memoryInst.Mux.Unlock() - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - frame.pc++ - case operationKindAtomicStore16: - val := uint16(ce.popValue()) - offset := ce.popMemoryOffset(op) - if offset%2 != 0 { - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - } - memoryInst.Mux.Lock() - ok := memoryInst.WriteUint16Le(offset, val) - memoryInst.Mux.Unlock() - if !ok { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - frame.pc++ - case operationKindAtomicRMW: - val := ce.popValue() - offset := ce.popMemoryOffset(op) - switch unsignedType(op.B1) { - case unsignedTypeI32: - if offset%4 != 0 { - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - } - memoryInst.Mux.Lock() - old, ok := memoryInst.ReadUint32Le(offset) - if !ok { - memoryInst.Mux.Unlock() - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - var newVal uint32 - switch atomicArithmeticOp(op.B2) { - case atomicArithmeticOpAdd: - newVal = old + uint32(val) - case atomicArithmeticOpSub: - newVal = old - uint32(val) - case atomicArithmeticOpAnd: - newVal = old & uint32(val) - case atomicArithmeticOpOr: - newVal = old | uint32(val) - case atomicArithmeticOpXor: - newVal = old ^ uint32(val) - case atomicArithmeticOpNop: - newVal = uint32(val) - } - memoryInst.WriteUint32Le(offset, newVal) - memoryInst.Mux.Unlock() - ce.pushValue(uint64(old)) - case unsignedTypeI64: - if offset%8 != 0 { - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - } - memoryInst.Mux.Lock() - old, ok := memoryInst.ReadUint64Le(offset) - if !ok { - memoryInst.Mux.Unlock() - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - var newVal uint64 - switch atomicArithmeticOp(op.B2) { - case atomicArithmeticOpAdd: - newVal = old + val - case atomicArithmeticOpSub: - newVal = old - val - case atomicArithmeticOpAnd: - newVal = old & val - case atomicArithmeticOpOr: - newVal = old | val - case atomicArithmeticOpXor: - newVal = old ^ val - case atomicArithmeticOpNop: - newVal = val - } - memoryInst.WriteUint64Le(offset, newVal) - memoryInst.Mux.Unlock() - ce.pushValue(old) - } - frame.pc++ - case operationKindAtomicRMW8: - val := ce.popValue() - offset := ce.popMemoryOffset(op) - memoryInst.Mux.Lock() - old, ok := memoryInst.ReadByte(offset) - if !ok { - memoryInst.Mux.Unlock() - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - arg := byte(val) - var newVal byte - switch atomicArithmeticOp(op.B2) { - case atomicArithmeticOpAdd: - newVal = old + arg - case atomicArithmeticOpSub: - newVal = old - arg - case atomicArithmeticOpAnd: - newVal = old & arg - case atomicArithmeticOpOr: - newVal = old | arg - case atomicArithmeticOpXor: - newVal = old ^ arg - case atomicArithmeticOpNop: - newVal = arg - } - memoryInst.WriteByte(offset, newVal) - memoryInst.Mux.Unlock() - ce.pushValue(uint64(old)) - frame.pc++ - case operationKindAtomicRMW16: - val := ce.popValue() - offset := ce.popMemoryOffset(op) - if offset%2 != 0 { - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - } - memoryInst.Mux.Lock() - old, ok := memoryInst.ReadUint16Le(offset) - if !ok { - memoryInst.Mux.Unlock() - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - arg := uint16(val) - var newVal uint16 - switch atomicArithmeticOp(op.B2) { - case atomicArithmeticOpAdd: - newVal = old + arg - case atomicArithmeticOpSub: - newVal = old - arg - case atomicArithmeticOpAnd: - newVal = old & arg - case atomicArithmeticOpOr: - newVal = old | arg - case atomicArithmeticOpXor: - newVal = old ^ arg - case atomicArithmeticOpNop: - newVal = arg - } - memoryInst.WriteUint16Le(offset, newVal) - memoryInst.Mux.Unlock() - ce.pushValue(uint64(old)) - frame.pc++ - case operationKindAtomicRMWCmpxchg: - rep := ce.popValue() - exp := ce.popValue() - offset := ce.popMemoryOffset(op) - switch unsignedType(op.B1) { - case unsignedTypeI32: - if offset%4 != 0 { - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - } - memoryInst.Mux.Lock() - old, ok := memoryInst.ReadUint32Le(offset) - if !ok { - memoryInst.Mux.Unlock() - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - if old == uint32(exp) { - memoryInst.WriteUint32Le(offset, uint32(rep)) - } - memoryInst.Mux.Unlock() - ce.pushValue(uint64(old)) - case unsignedTypeI64: - if offset%8 != 0 { - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - } - memoryInst.Mux.Lock() - old, ok := memoryInst.ReadUint64Le(offset) - if !ok { - memoryInst.Mux.Unlock() - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - if old == exp { - memoryInst.WriteUint64Le(offset, rep) - } - memoryInst.Mux.Unlock() - ce.pushValue(old) - } - frame.pc++ - case operationKindAtomicRMW8Cmpxchg: - rep := byte(ce.popValue()) - exp := byte(ce.popValue()) - offset := ce.popMemoryOffset(op) - memoryInst.Mux.Lock() - old, ok := memoryInst.ReadByte(offset) - if !ok { - memoryInst.Mux.Unlock() - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - if old == exp { - memoryInst.WriteByte(offset, rep) - } - memoryInst.Mux.Unlock() - ce.pushValue(uint64(old)) - frame.pc++ - case operationKindAtomicRMW16Cmpxchg: - rep := uint16(ce.popValue()) - exp := uint16(ce.popValue()) - offset := ce.popMemoryOffset(op) - if offset%2 != 0 { - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - } - memoryInst.Mux.Lock() - old, ok := memoryInst.ReadUint16Le(offset) - if !ok { - memoryInst.Mux.Unlock() - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - if old == exp { - memoryInst.WriteUint16Le(offset, rep) - } - memoryInst.Mux.Unlock() - ce.pushValue(uint64(old)) - frame.pc++ - default: - frame.pc++ - } - } - ce.popFrame() -} - -func wasmCompatMax32bits(v1, v2 uint32) uint64 { - return uint64(math.Float32bits(moremath.WasmCompatMax32( - math.Float32frombits(v1), - math.Float32frombits(v2), - ))) -} - -func wasmCompatMin32bits(v1, v2 uint32) uint64 { - return uint64(math.Float32bits(moremath.WasmCompatMin32( - math.Float32frombits(v1), - math.Float32frombits(v2), - ))) -} - -func addFloat32bits(v1, v2 uint32) uint64 { - return uint64(math.Float32bits(math.Float32frombits(v1) + math.Float32frombits(v2))) -} - -func subFloat32bits(v1, v2 uint32) uint64 { - return uint64(math.Float32bits(math.Float32frombits(v1) - math.Float32frombits(v2))) -} - -func mulFloat32bits(v1, v2 uint32) uint64 { - return uint64(math.Float32bits(math.Float32frombits(v1) * math.Float32frombits(v2))) -} - -func divFloat32bits(v1, v2 uint32) uint64 { - return uint64(math.Float32bits(math.Float32frombits(v1) / math.Float32frombits(v2))) -} - -// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2 -func flt32(z1, z2 float32) bool { - if z1 != z1 || z2 != z2 { - return false - } else if z1 == z2 { - return false - } else if math.IsInf(float64(z1), 1) { - return false - } else if math.IsInf(float64(z1), -1) { - return true - } else if math.IsInf(float64(z2), 1) { - return true - } else if math.IsInf(float64(z2), -1) { - return false - } - return z1 < z2 -} - -// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/numerics.html#xref-exec-numerics-op-flt-mathrm-flt-n-z-1-z-2 -func flt64(z1, z2 float64) bool { - if z1 != z1 || z2 != z2 { - return false - } else if z1 == z2 { - return false - } else if math.IsInf(z1, 1) { - return false - } else if math.IsInf(z1, -1) { - return true - } else if math.IsInf(z2, 1) { - return true - } else if math.IsInf(z2, -1) { - return false - } - return z1 < z2 -} - -func i8RoundingAverage(v1, v2 byte) byte { - // https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average - return byte((uint16(v1) + uint16(v2) + uint16(1)) / 2) -} - -func i16RoundingAverage(v1, v2 uint16) uint16 { - // https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md#lane-wise-integer-rounding-average - return uint16((uint32(v1) + uint32(v2) + 1) / 2) -} - -func i8Abs(v byte) byte { - if i := int8(v); i < 0 { - return byte(-i) - } else { - return byte(i) - } -} - -func i8MaxU(v1, v2 byte) byte { - if v1 < v2 { - return v2 - } else { - return v1 - } -} - -func i8MinU(v1, v2 byte) byte { - if v1 > v2 { - return v2 - } else { - return v1 - } -} - -func i8MaxS(v1, v2 byte) byte { - if int8(v1) < int8(v2) { - return v2 - } else { - return v1 - } -} - -func i8MinS(v1, v2 byte) byte { - if int8(v1) > int8(v2) { - return v2 - } else { - return v1 - } -} - -func i16MaxU(v1, v2 uint16) uint16 { - if v1 < v2 { - return v2 - } else { - return v1 - } -} - -func i16MinU(v1, v2 uint16) uint16 { - if v1 > v2 { - return v2 - } else { - return v1 - } -} - -func i16MaxS(v1, v2 uint16) uint16 { - if int16(v1) < int16(v2) { - return v2 - } else { - return v1 - } -} - -func i16MinS(v1, v2 uint16) uint16 { - if int16(v1) > int16(v2) { - return v2 - } else { - return v1 - } -} - -func i32MaxU(v1, v2 uint32) uint32 { - if v1 < v2 { - return v2 - } else { - return v1 - } -} - -func i32MinU(v1, v2 uint32) uint32 { - if v1 > v2 { - return v2 - } else { - return v1 - } -} - -func i32MaxS(v1, v2 uint32) uint32 { - if int32(v1) < int32(v2) { - return v2 - } else { - return v1 - } -} - -func i32MinS(v1, v2 uint32) uint32 { - if int32(v1) > int32(v2) { - return v2 - } else { - return v1 - } -} - -func i16Abs(v uint16) uint16 { - if i := int16(v); i < 0 { - return uint16(-i) - } else { - return uint16(i) - } -} - -func i32Abs(v uint32) uint32 { - if i := int32(v); i < 0 { - return uint32(-i) - } else { - return uint32(i) - } -} - -func (ce *callEngine) callNativeFuncWithListener(ctx context.Context, m *wasm.ModuleInstance, f *function, fnl experimental.FunctionListener) context.Context { - def, typ := f.definition(), f.funcType - - ce.stackIterator.reset(ce.stack, ce.frames, f) - fnl.Before(ctx, m, def, ce.peekValues(typ.ParamNumInUint64), &ce.stackIterator) - ce.stackIterator.clear() - ce.callNativeFunc(ctx, m, f) - fnl.After(ctx, m, def, ce.peekValues(typ.ResultNumInUint64)) - return ctx -} - -// popMemoryOffset takes a memory offset off the stack for use in load and store instructions. -// As the top of stack value is 64-bit, this ensures it is in range before returning it. -func (ce *callEngine) popMemoryOffset(op *unionOperation) uint32 { - offset := op.U2 + ce.popValue() - if offset > math.MaxUint32 { - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - } - return uint32(offset) -} - -func (ce *callEngine) callGoFuncWithStack(ctx context.Context, m *wasm.ModuleInstance, f *function) { - typ := f.funcType - paramLen := typ.ParamNumInUint64 - resultLen := typ.ResultNumInUint64 - stackLen := paramLen - - // In the interpreter engine, ce.stack may only have capacity to store - // parameters. Grow when there are more results than parameters. - if growLen := resultLen - paramLen; growLen > 0 { - for i := 0; i < growLen; i++ { - ce.stack = append(ce.stack, 0) - } - stackLen += growLen - } - - // Pass the stack elements to the go function. - stack := ce.stack[len(ce.stack)-stackLen:] - ce.callGoFunc(ctx, m, f, stack) - - // Shrink the stack when there were more parameters than results. - if shrinkLen := paramLen - resultLen; shrinkLen > 0 { - ce.stack = ce.stack[0 : len(ce.stack)-shrinkLen] - } -} - -// v128Dot performs a dot product of two 64-bit vectors. -// Note: for some reason (which I suspect is due to a bug in Go compiler's regalloc), -// inlining this function causes a bug which happens **only when** we run with -race AND arm64 AND Go 1.22. -func v128Dot(x1Hi, x1Lo, x2Hi, x2Lo uint64) (uint64, uint64) { - r1 := int32(int16(x1Lo>>0)) * int32(int16(x2Lo>>0)) - r2 := int32(int16(x1Lo>>16)) * int32(int16(x2Lo>>16)) - r3 := int32(int16(x1Lo>>32)) * int32(int16(x2Lo>>32)) - r4 := int32(int16(x1Lo>>48)) * int32(int16(x2Lo>>48)) - r5 := int32(int16(x1Hi>>0)) * int32(int16(x2Hi>>0)) - r6 := int32(int16(x1Hi>>16)) * int32(int16(x2Hi>>16)) - r7 := int32(int16(x1Hi>>32)) * int32(int16(x2Hi>>32)) - r8 := int32(int16(x1Hi>>48)) * int32(int16(x2Hi>>48)) - return uint64(uint32(r1+r2)) | (uint64(uint32(r3+r4)) << 32), uint64(uint32(r5+r6)) | (uint64(uint32(r7+r8)) << 32) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go deleted file mode 100644 index 3087a718f..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go +++ /dev/null @@ -1,2812 +0,0 @@ -package interpreter - -import ( - "fmt" - "math" - "strings" -) - -// unsignedInt represents unsigned 32-bit or 64-bit integers. -type unsignedInt byte - -const ( - unsignedInt32 unsignedInt = iota - unsignedInt64 -) - -// String implements fmt.Stringer. -func (s unsignedInt) String() (ret string) { - switch s { - case unsignedInt32: - ret = "i32" - case unsignedInt64: - ret = "i64" - } - return -} - -// signedInt represents signed or unsigned integers. -type signedInt byte - -const ( - signedInt32 signedInt = iota - signedInt64 - signedUint32 - signedUint64 -) - -// String implements fmt.Stringer. -func (s signedInt) String() (ret string) { - switch s { - case signedUint32: - ret = "u32" - case signedUint64: - ret = "u64" - case signedInt32: - ret = "s32" - case signedInt64: - ret = "s64" - } - return -} - -// float represents the scalar double or single precision floating points. -type float byte - -const ( - f32 float = iota - f64 -) - -// String implements fmt.Stringer. -func (s float) String() (ret string) { - switch s { - case f32: - ret = "f32" - case f64: - ret = "f64" - } - return -} - -// unsignedType is the union of unsignedInt, float and V128 vector type. -type unsignedType byte - -const ( - unsignedTypeI32 unsignedType = iota - unsignedTypeI64 - unsignedTypeF32 - unsignedTypeF64 - unsignedTypeV128 - unsignedTypeUnknown -) - -// String implements fmt.Stringer. -func (s unsignedType) String() (ret string) { - switch s { - case unsignedTypeI32: - ret = "i32" - case unsignedTypeI64: - ret = "i64" - case unsignedTypeF32: - ret = "f32" - case unsignedTypeF64: - ret = "f64" - case unsignedTypeV128: - ret = "v128" - case unsignedTypeUnknown: - ret = "unknown" - } - return -} - -// signedType is the union of signedInt and float types. -type signedType byte - -const ( - signedTypeInt32 signedType = iota - signedTypeUint32 - signedTypeInt64 - signedTypeUint64 - signedTypeFloat32 - signedTypeFloat64 -) - -// String implements fmt.Stringer. -func (s signedType) String() (ret string) { - switch s { - case signedTypeInt32: - ret = "s32" - case signedTypeUint32: - ret = "u32" - case signedTypeInt64: - ret = "s64" - case signedTypeUint64: - ret = "u64" - case signedTypeFloat32: - ret = "f32" - case signedTypeFloat64: - ret = "f64" - } - return -} - -// operationKind is the Kind of each implementation of Operation interface. -type operationKind uint16 - -// String implements fmt.Stringer. -func (o operationKind) String() (ret string) { - switch o { - case operationKindUnreachable: - ret = "Unreachable" - case operationKindLabel: - ret = "label" - case operationKindBr: - ret = "Br" - case operationKindBrIf: - ret = "BrIf" - case operationKindBrTable: - ret = "BrTable" - case operationKindCall: - ret = "Call" - case operationKindCallIndirect: - ret = "CallIndirect" - case operationKindDrop: - ret = "Drop" - case operationKindSelect: - ret = "Select" - case operationKindPick: - ret = "Pick" - case operationKindSet: - ret = "Swap" - case operationKindGlobalGet: - ret = "GlobalGet" - case operationKindGlobalSet: - ret = "GlobalSet" - case operationKindLoad: - ret = "Load" - case operationKindLoad8: - ret = "Load8" - case operationKindLoad16: - ret = "Load16" - case operationKindLoad32: - ret = "Load32" - case operationKindStore: - ret = "Store" - case operationKindStore8: - ret = "Store8" - case operationKindStore16: - ret = "Store16" - case operationKindStore32: - ret = "Store32" - case operationKindMemorySize: - ret = "MemorySize" - case operationKindMemoryGrow: - ret = "MemoryGrow" - case operationKindConstI32: - ret = "ConstI32" - case operationKindConstI64: - ret = "ConstI64" - case operationKindConstF32: - ret = "ConstF32" - case operationKindConstF64: - ret = "ConstF64" - case operationKindEq: - ret = "Eq" - case operationKindNe: - ret = "Ne" - case operationKindEqz: - ret = "Eqz" - case operationKindLt: - ret = "Lt" - case operationKindGt: - ret = "Gt" - case operationKindLe: - ret = "Le" - case operationKindGe: - ret = "Ge" - case operationKindAdd: - ret = "Add" - case operationKindSub: - ret = "Sub" - case operationKindMul: - ret = "Mul" - case operationKindClz: - ret = "Clz" - case operationKindCtz: - ret = "Ctz" - case operationKindPopcnt: - ret = "Popcnt" - case operationKindDiv: - ret = "Div" - case operationKindRem: - ret = "Rem" - case operationKindAnd: - ret = "And" - case operationKindOr: - ret = "Or" - case operationKindXor: - ret = "Xor" - case operationKindShl: - ret = "Shl" - case operationKindShr: - ret = "Shr" - case operationKindRotl: - ret = "Rotl" - case operationKindRotr: - ret = "Rotr" - case operationKindAbs: - ret = "Abs" - case operationKindNeg: - ret = "Neg" - case operationKindCeil: - ret = "Ceil" - case operationKindFloor: - ret = "Floor" - case operationKindTrunc: - ret = "Trunc" - case operationKindNearest: - ret = "Nearest" - case operationKindSqrt: - ret = "Sqrt" - case operationKindMin: - ret = "Min" - case operationKindMax: - ret = "Max" - case operationKindCopysign: - ret = "Copysign" - case operationKindI32WrapFromI64: - ret = "I32WrapFromI64" - case operationKindITruncFromF: - ret = "ITruncFromF" - case operationKindFConvertFromI: - ret = "FConvertFromI" - case operationKindF32DemoteFromF64: - ret = "F32DemoteFromF64" - case operationKindF64PromoteFromF32: - ret = "F64PromoteFromF32" - case operationKindI32ReinterpretFromF32: - ret = "I32ReinterpretFromF32" - case operationKindI64ReinterpretFromF64: - ret = "I64ReinterpretFromF64" - case operationKindF32ReinterpretFromI32: - ret = "F32ReinterpretFromI32" - case operationKindF64ReinterpretFromI64: - ret = "F64ReinterpretFromI64" - case operationKindExtend: - ret = "Extend" - case operationKindMemoryInit: - ret = "MemoryInit" - case operationKindDataDrop: - ret = "DataDrop" - case operationKindMemoryCopy: - ret = "MemoryCopy" - case operationKindMemoryFill: - ret = "MemoryFill" - case operationKindTableInit: - ret = "TableInit" - case operationKindElemDrop: - ret = "ElemDrop" - case operationKindTableCopy: - ret = "TableCopy" - case operationKindRefFunc: - ret = "RefFunc" - case operationKindTableGet: - ret = "TableGet" - case operationKindTableSet: - ret = "TableSet" - case operationKindTableSize: - ret = "TableSize" - case operationKindTableGrow: - ret = "TableGrow" - case operationKindTableFill: - ret = "TableFill" - case operationKindV128Const: - ret = "ConstV128" - case operationKindV128Add: - ret = "V128Add" - case operationKindV128Sub: - ret = "V128Sub" - case operationKindV128Load: - ret = "V128Load" - case operationKindV128LoadLane: - ret = "V128LoadLane" - case operationKindV128Store: - ret = "V128Store" - case operationKindV128StoreLane: - ret = "V128StoreLane" - case operationKindV128ExtractLane: - ret = "V128ExtractLane" - case operationKindV128ReplaceLane: - ret = "V128ReplaceLane" - case operationKindV128Splat: - ret = "V128Splat" - case operationKindV128Shuffle: - ret = "V128Shuffle" - case operationKindV128Swizzle: - ret = "V128Swizzle" - case operationKindV128AnyTrue: - ret = "V128AnyTrue" - case operationKindV128AllTrue: - ret = "V128AllTrue" - case operationKindV128And: - ret = "V128And" - case operationKindV128Not: - ret = "V128Not" - case operationKindV128Or: - ret = "V128Or" - case operationKindV128Xor: - ret = "V128Xor" - case operationKindV128Bitselect: - ret = "V128Bitselect" - case operationKindV128AndNot: - ret = "V128AndNot" - case operationKindV128BitMask: - ret = "V128BitMask" - case operationKindV128Shl: - ret = "V128Shl" - case operationKindV128Shr: - ret = "V128Shr" - case operationKindV128Cmp: - ret = "V128Cmp" - case operationKindSignExtend32From8: - ret = "SignExtend32From8" - case operationKindSignExtend32From16: - ret = "SignExtend32From16" - case operationKindSignExtend64From8: - ret = "SignExtend64From8" - case operationKindSignExtend64From16: - ret = "SignExtend64From16" - case operationKindSignExtend64From32: - ret = "SignExtend64From32" - case operationKindV128AddSat: - ret = "V128AddSat" - case operationKindV128SubSat: - ret = "V128SubSat" - case operationKindV128Mul: - ret = "V128Mul" - case operationKindV128Div: - ret = "V128Div" - case operationKindV128Neg: - ret = "V128Neg" - case operationKindV128Sqrt: - ret = "V128Sqrt" - case operationKindV128Abs: - ret = "V128Abs" - case operationKindV128Popcnt: - ret = "V128Popcnt" - case operationKindV128Min: - ret = "V128Min" - case operationKindV128Max: - ret = "V128Max" - case operationKindV128AvgrU: - ret = "V128AvgrU" - case operationKindV128Ceil: - ret = "V128Ceil" - case operationKindV128Floor: - ret = "V128Floor" - case operationKindV128Trunc: - ret = "V128Trunc" - case operationKindV128Nearest: - ret = "V128Nearest" - case operationKindV128Pmin: - ret = "V128Pmin" - case operationKindV128Pmax: - ret = "V128Pmax" - case operationKindV128Extend: - ret = "V128Extend" - case operationKindV128ExtMul: - ret = "V128ExtMul" - case operationKindV128Q15mulrSatS: - ret = "V128Q15mulrSatS" - case operationKindV128ExtAddPairwise: - ret = "V128ExtAddPairwise" - case operationKindV128FloatPromote: - ret = "V128FloatPromote" - case operationKindV128FloatDemote: - ret = "V128FloatDemote" - case operationKindV128FConvertFromI: - ret = "V128FConvertFromI" - case operationKindV128Dot: - ret = "V128Dot" - case operationKindV128Narrow: - ret = "V128Narrow" - case operationKindV128ITruncSatFromF: - ret = "V128ITruncSatFromF" - case operationKindBuiltinFunctionCheckExitCode: - ret = "BuiltinFunctionCheckExitCode" - case operationKindAtomicMemoryWait: - ret = "operationKindAtomicMemoryWait" - case operationKindAtomicMemoryNotify: - ret = "operationKindAtomicMemoryNotify" - case operationKindAtomicFence: - ret = "operationKindAtomicFence" - case operationKindAtomicLoad: - ret = "operationKindAtomicLoad" - case operationKindAtomicLoad8: - ret = "operationKindAtomicLoad8" - case operationKindAtomicLoad16: - ret = "operationKindAtomicLoad16" - case operationKindAtomicStore: - ret = "operationKindAtomicStore" - case operationKindAtomicStore8: - ret = "operationKindAtomicStore8" - case operationKindAtomicStore16: - ret = "operationKindAtomicStore16" - case operationKindAtomicRMW: - ret = "operationKindAtomicRMW" - case operationKindAtomicRMW8: - ret = "operationKindAtomicRMW8" - case operationKindAtomicRMW16: - ret = "operationKindAtomicRMW16" - case operationKindAtomicRMWCmpxchg: - ret = "operationKindAtomicRMWCmpxchg" - case operationKindAtomicRMW8Cmpxchg: - ret = "operationKindAtomicRMW8Cmpxchg" - case operationKindAtomicRMW16Cmpxchg: - ret = "operationKindAtomicRMW16Cmpxchg" - default: - panic(fmt.Errorf("unknown operation %d", o)) - } - return -} - -const ( - // operationKindUnreachable is the Kind for NewOperationUnreachable. - operationKindUnreachable operationKind = iota - // operationKindLabel is the Kind for NewOperationLabel. - operationKindLabel - // operationKindBr is the Kind for NewOperationBr. - operationKindBr - // operationKindBrIf is the Kind for NewOperationBrIf. - operationKindBrIf - // operationKindBrTable is the Kind for NewOperationBrTable. - operationKindBrTable - // operationKindCall is the Kind for NewOperationCall. - operationKindCall - // operationKindCallIndirect is the Kind for NewOperationCallIndirect. - operationKindCallIndirect - // operationKindDrop is the Kind for NewOperationDrop. - operationKindDrop - // operationKindSelect is the Kind for NewOperationSelect. - operationKindSelect - // operationKindPick is the Kind for NewOperationPick. - operationKindPick - // operationKindSet is the Kind for NewOperationSet. - operationKindSet - // operationKindGlobalGet is the Kind for NewOperationGlobalGet. - operationKindGlobalGet - // operationKindGlobalSet is the Kind for NewOperationGlobalSet. - operationKindGlobalSet - // operationKindLoad is the Kind for NewOperationLoad. - operationKindLoad - // operationKindLoad8 is the Kind for NewOperationLoad8. - operationKindLoad8 - // operationKindLoad16 is the Kind for NewOperationLoad16. - operationKindLoad16 - // operationKindLoad32 is the Kind for NewOperationLoad32. - operationKindLoad32 - // operationKindStore is the Kind for NewOperationStore. - operationKindStore - // operationKindStore8 is the Kind for NewOperationStore8. - operationKindStore8 - // operationKindStore16 is the Kind for NewOperationStore16. - operationKindStore16 - // operationKindStore32 is the Kind for NewOperationStore32. - operationKindStore32 - // operationKindMemorySize is the Kind for NewOperationMemorySize. - operationKindMemorySize - // operationKindMemoryGrow is the Kind for NewOperationMemoryGrow. - operationKindMemoryGrow - // operationKindConstI32 is the Kind for NewOperationConstI32. - operationKindConstI32 - // operationKindConstI64 is the Kind for NewOperationConstI64. - operationKindConstI64 - // operationKindConstF32 is the Kind for NewOperationConstF32. - operationKindConstF32 - // operationKindConstF64 is the Kind for NewOperationConstF64. - operationKindConstF64 - // operationKindEq is the Kind for NewOperationEq. - operationKindEq - // operationKindNe is the Kind for NewOperationNe. - operationKindNe - // operationKindEqz is the Kind for NewOperationEqz. - operationKindEqz - // operationKindLt is the Kind for NewOperationLt. - operationKindLt - // operationKindGt is the Kind for NewOperationGt. - operationKindGt - // operationKindLe is the Kind for NewOperationLe. - operationKindLe - // operationKindGe is the Kind for NewOperationGe. - operationKindGe - // operationKindAdd is the Kind for NewOperationAdd. - operationKindAdd - // operationKindSub is the Kind for NewOperationSub. - operationKindSub - // operationKindMul is the Kind for NewOperationMul. - operationKindMul - // operationKindClz is the Kind for NewOperationClz. - operationKindClz - // operationKindCtz is the Kind for NewOperationCtz. - operationKindCtz - // operationKindPopcnt is the Kind for NewOperationPopcnt. - operationKindPopcnt - // operationKindDiv is the Kind for NewOperationDiv. - operationKindDiv - // operationKindRem is the Kind for NewOperationRem. - operationKindRem - // operationKindAnd is the Kind for NewOperationAnd. - operationKindAnd - // operationKindOr is the Kind for NewOperationOr. - operationKindOr - // operationKindXor is the Kind for NewOperationXor. - operationKindXor - // operationKindShl is the Kind for NewOperationShl. - operationKindShl - // operationKindShr is the Kind for NewOperationShr. - operationKindShr - // operationKindRotl is the Kind for NewOperationRotl. - operationKindRotl - // operationKindRotr is the Kind for NewOperationRotr. - operationKindRotr - // operationKindAbs is the Kind for NewOperationAbs. - operationKindAbs - // operationKindNeg is the Kind for NewOperationNeg. - operationKindNeg - // operationKindCeil is the Kind for NewOperationCeil. - operationKindCeil - // operationKindFloor is the Kind for NewOperationFloor. - operationKindFloor - // operationKindTrunc is the Kind for NewOperationTrunc. - operationKindTrunc - // operationKindNearest is the Kind for NewOperationNearest. - operationKindNearest - // operationKindSqrt is the Kind for NewOperationSqrt. - operationKindSqrt - // operationKindMin is the Kind for NewOperationMin. - operationKindMin - // operationKindMax is the Kind for NewOperationMax. - operationKindMax - // operationKindCopysign is the Kind for NewOperationCopysign. - operationKindCopysign - // operationKindI32WrapFromI64 is the Kind for NewOperationI32WrapFromI64. - operationKindI32WrapFromI64 - // operationKindITruncFromF is the Kind for NewOperationITruncFromF. - operationKindITruncFromF - // operationKindFConvertFromI is the Kind for NewOperationFConvertFromI. - operationKindFConvertFromI - // operationKindF32DemoteFromF64 is the Kind for NewOperationF32DemoteFromF64. - operationKindF32DemoteFromF64 - // operationKindF64PromoteFromF32 is the Kind for NewOperationF64PromoteFromF32. - operationKindF64PromoteFromF32 - // operationKindI32ReinterpretFromF32 is the Kind for NewOperationI32ReinterpretFromF32. - operationKindI32ReinterpretFromF32 - // operationKindI64ReinterpretFromF64 is the Kind for NewOperationI64ReinterpretFromF64. - operationKindI64ReinterpretFromF64 - // operationKindF32ReinterpretFromI32 is the Kind for NewOperationF32ReinterpretFromI32. - operationKindF32ReinterpretFromI32 - // operationKindF64ReinterpretFromI64 is the Kind for NewOperationF64ReinterpretFromI64. - operationKindF64ReinterpretFromI64 - // operationKindExtend is the Kind for NewOperationExtend. - operationKindExtend - // operationKindSignExtend32From8 is the Kind for NewOperationSignExtend32From8. - operationKindSignExtend32From8 - // operationKindSignExtend32From16 is the Kind for NewOperationSignExtend32From16. - operationKindSignExtend32From16 - // operationKindSignExtend64From8 is the Kind for NewOperationSignExtend64From8. - operationKindSignExtend64From8 - // operationKindSignExtend64From16 is the Kind for NewOperationSignExtend64From16. - operationKindSignExtend64From16 - // operationKindSignExtend64From32 is the Kind for NewOperationSignExtend64From32. - operationKindSignExtend64From32 - // operationKindMemoryInit is the Kind for NewOperationMemoryInit. - operationKindMemoryInit - // operationKindDataDrop is the Kind for NewOperationDataDrop. - operationKindDataDrop - // operationKindMemoryCopy is the Kind for NewOperationMemoryCopy. - operationKindMemoryCopy - // operationKindMemoryFill is the Kind for NewOperationMemoryFill. - operationKindMemoryFill - // operationKindTableInit is the Kind for NewOperationTableInit. - operationKindTableInit - // operationKindElemDrop is the Kind for NewOperationElemDrop. - operationKindElemDrop - // operationKindTableCopy is the Kind for NewOperationTableCopy. - operationKindTableCopy - // operationKindRefFunc is the Kind for NewOperationRefFunc. - operationKindRefFunc - // operationKindTableGet is the Kind for NewOperationTableGet. - operationKindTableGet - // operationKindTableSet is the Kind for NewOperationTableSet. - operationKindTableSet - // operationKindTableSize is the Kind for NewOperationTableSize. - operationKindTableSize - // operationKindTableGrow is the Kind for NewOperationTableGrow. - operationKindTableGrow - // operationKindTableFill is the Kind for NewOperationTableFill. - operationKindTableFill - - // Vector value related instructions are prefixed by V128. - - // operationKindV128Const is the Kind for NewOperationV128Const. - operationKindV128Const - // operationKindV128Add is the Kind for NewOperationV128Add. - operationKindV128Add - // operationKindV128Sub is the Kind for NewOperationV128Sub. - operationKindV128Sub - // operationKindV128Load is the Kind for NewOperationV128Load. - operationKindV128Load - // operationKindV128LoadLane is the Kind for NewOperationV128LoadLane. - operationKindV128LoadLane - // operationKindV128Store is the Kind for NewOperationV128Store. - operationKindV128Store - // operationKindV128StoreLane is the Kind for NewOperationV128StoreLane. - operationKindV128StoreLane - // operationKindV128ExtractLane is the Kind for NewOperationV128ExtractLane. - operationKindV128ExtractLane - // operationKindV128ReplaceLane is the Kind for NewOperationV128ReplaceLane. - operationKindV128ReplaceLane - // operationKindV128Splat is the Kind for NewOperationV128Splat. - operationKindV128Splat - // operationKindV128Shuffle is the Kind for NewOperationV128Shuffle. - operationKindV128Shuffle - // operationKindV128Swizzle is the Kind for NewOperationV128Swizzle. - operationKindV128Swizzle - // operationKindV128AnyTrue is the Kind for NewOperationV128AnyTrue. - operationKindV128AnyTrue - // operationKindV128AllTrue is the Kind for NewOperationV128AllTrue. - operationKindV128AllTrue - // operationKindV128BitMask is the Kind for NewOperationV128BitMask. - operationKindV128BitMask - // operationKindV128And is the Kind for NewOperationV128And. - operationKindV128And - // operationKindV128Not is the Kind for NewOperationV128Not. - operationKindV128Not - // operationKindV128Or is the Kind for NewOperationV128Or. - operationKindV128Or - // operationKindV128Xor is the Kind for NewOperationV128Xor. - operationKindV128Xor - // operationKindV128Bitselect is the Kind for NewOperationV128Bitselect. - operationKindV128Bitselect - // operationKindV128AndNot is the Kind for NewOperationV128AndNot. - operationKindV128AndNot - // operationKindV128Shl is the Kind for NewOperationV128Shl. - operationKindV128Shl - // operationKindV128Shr is the Kind for NewOperationV128Shr. - operationKindV128Shr - // operationKindV128Cmp is the Kind for NewOperationV128Cmp. - operationKindV128Cmp - // operationKindV128AddSat is the Kind for NewOperationV128AddSat. - operationKindV128AddSat - // operationKindV128SubSat is the Kind for NewOperationV128SubSat. - operationKindV128SubSat - // operationKindV128Mul is the Kind for NewOperationV128Mul. - operationKindV128Mul - // operationKindV128Div is the Kind for NewOperationV128Div. - operationKindV128Div - // operationKindV128Neg is the Kind for NewOperationV128Neg. - operationKindV128Neg - // operationKindV128Sqrt is the Kind for NewOperationV128Sqrt. - operationKindV128Sqrt - // operationKindV128Abs is the Kind for NewOperationV128Abs. - operationKindV128Abs - // operationKindV128Popcnt is the Kind for NewOperationV128Popcnt. - operationKindV128Popcnt - // operationKindV128Min is the Kind for NewOperationV128Min. - operationKindV128Min - // operationKindV128Max is the Kind for NewOperationV128Max. - operationKindV128Max - // operationKindV128AvgrU is the Kind for NewOperationV128AvgrU. - operationKindV128AvgrU - // operationKindV128Pmin is the Kind for NewOperationV128Pmin. - operationKindV128Pmin - // operationKindV128Pmax is the Kind for NewOperationV128Pmax. - operationKindV128Pmax - // operationKindV128Ceil is the Kind for NewOperationV128Ceil. - operationKindV128Ceil - // operationKindV128Floor is the Kind for NewOperationV128Floor. - operationKindV128Floor - // operationKindV128Trunc is the Kind for NewOperationV128Trunc. - operationKindV128Trunc - // operationKindV128Nearest is the Kind for NewOperationV128Nearest. - operationKindV128Nearest - // operationKindV128Extend is the Kind for NewOperationV128Extend. - operationKindV128Extend - // operationKindV128ExtMul is the Kind for NewOperationV128ExtMul. - operationKindV128ExtMul - // operationKindV128Q15mulrSatS is the Kind for NewOperationV128Q15mulrSatS. - operationKindV128Q15mulrSatS - // operationKindV128ExtAddPairwise is the Kind for NewOperationV128ExtAddPairwise. - operationKindV128ExtAddPairwise - // operationKindV128FloatPromote is the Kind for NewOperationV128FloatPromote. - operationKindV128FloatPromote - // operationKindV128FloatDemote is the Kind for NewOperationV128FloatDemote. - operationKindV128FloatDemote - // operationKindV128FConvertFromI is the Kind for NewOperationV128FConvertFromI. - operationKindV128FConvertFromI - // operationKindV128Dot is the Kind for NewOperationV128Dot. - operationKindV128Dot - // operationKindV128Narrow is the Kind for NewOperationV128Narrow. - operationKindV128Narrow - // operationKindV128ITruncSatFromF is the Kind for NewOperationV128ITruncSatFromF. - operationKindV128ITruncSatFromF - - // operationKindBuiltinFunctionCheckExitCode is the Kind for NewOperationBuiltinFunctionCheckExitCode. - operationKindBuiltinFunctionCheckExitCode - - // operationKindAtomicMemoryWait is the kind for NewOperationAtomicMemoryWait. - operationKindAtomicMemoryWait - // operationKindAtomicMemoryNotify is the kind for NewOperationAtomicMemoryNotify. - operationKindAtomicMemoryNotify - // operationKindAtomicFence is the kind for NewOperationAtomicFence. - operationKindAtomicFence - // operationKindAtomicLoad is the kind for NewOperationAtomicLoad. - operationKindAtomicLoad - // operationKindAtomicLoad8 is the kind for NewOperationAtomicLoad8. - operationKindAtomicLoad8 - // operationKindAtomicLoad16 is the kind for NewOperationAtomicLoad16. - operationKindAtomicLoad16 - // operationKindAtomicStore is the kind for NewOperationAtomicStore. - operationKindAtomicStore - // operationKindAtomicStore8 is the kind for NewOperationAtomicStore8. - operationKindAtomicStore8 - // operationKindAtomicStore16 is the kind for NewOperationAtomicStore16. - operationKindAtomicStore16 - - // operationKindAtomicRMW is the kind for NewOperationAtomicRMW. - operationKindAtomicRMW - // operationKindAtomicRMW8 is the kind for NewOperationAtomicRMW8. - operationKindAtomicRMW8 - // operationKindAtomicRMW16 is the kind for NewOperationAtomicRMW16. - operationKindAtomicRMW16 - - // operationKindAtomicRMWCmpxchg is the kind for NewOperationAtomicRMWCmpxchg. - operationKindAtomicRMWCmpxchg - // operationKindAtomicRMW8Cmpxchg is the kind for NewOperationAtomicRMW8Cmpxchg. - operationKindAtomicRMW8Cmpxchg - // operationKindAtomicRMW16Cmpxchg is the kind for NewOperationAtomicRMW16Cmpxchg. - operationKindAtomicRMW16Cmpxchg - - // operationKindEnd is always placed at the bottom of this iota definition to be used in the test. - operationKindEnd -) - -// NewOperationBuiltinFunctionCheckExitCode is a constructor for unionOperation with Kind operationKindBuiltinFunctionCheckExitCode. -// -// OperationBuiltinFunctionCheckExitCode corresponds to the instruction to check the api.Module is already closed due to -// context.DeadlineExceeded, context.Canceled, or the explicit call of CloseWithExitCode on api.Module. -func newOperationBuiltinFunctionCheckExitCode() unionOperation { - return unionOperation{Kind: operationKindBuiltinFunctionCheckExitCode} -} - -// label is the unique identifier for each block in a single function in interpreterir -// where "block" consists of multiple operations, and must End with branching operations -// (e.g. operationKindBr or operationKindBrIf). -type label uint64 - -// Kind returns the labelKind encoded in this label. -func (l label) Kind() labelKind { - return labelKind(uint32(l)) -} - -// FrameID returns the frame id encoded in this label. -func (l label) FrameID() int { - return int(uint32(l >> 32)) -} - -// NewLabel is a constructor for a label. -func newLabel(kind labelKind, frameID uint32) label { - return label(kind) | label(frameID)<<32 -} - -// String implements fmt.Stringer. -func (l label) String() (ret string) { - frameID := l.FrameID() - switch l.Kind() { - case labelKindHeader: - ret = fmt.Sprintf(".L%d", frameID) - case labelKindElse: - ret = fmt.Sprintf(".L%d_else", frameID) - case labelKindContinuation: - ret = fmt.Sprintf(".L%d_cont", frameID) - case labelKindReturn: - return ".return" - } - return -} - -func (l label) IsReturnTarget() bool { - return l.Kind() == labelKindReturn -} - -// labelKind is the Kind of the label. -type labelKind = byte - -const ( - // labelKindHeader is the header for various blocks. For example, the "then" block of - // wasm.OpcodeIfName in Wasm has the label of this Kind. - labelKindHeader labelKind = iota - // labelKindElse is the Kind of label for "else" block of wasm.OpcodeIfName in Wasm. - labelKindElse - // labelKindContinuation is the Kind of label which is the continuation of blocks. - // For example, for wasm text like - // (func - // .... - // (if (local.get 0) (then (nop)) (else (nop))) - // return - // ) - // we have the continuation block (of if-block) corresponding to "return" opcode. - labelKindContinuation - labelKindReturn - labelKindNum -) - -// unionOperation implements Operation and is the compilation (engine.lowerIR) result of a interpreterir.Operation. -// -// Not all operations result in a unionOperation, e.g. interpreterir.OperationI32ReinterpretFromF32, and some operations are -// more complex than others, e.g. interpreterir.NewOperationBrTable. -// -// Note: This is a form of union type as it can store fields needed for any operation. Hence, most fields are opaque and -// only relevant when in context of its kind. -type unionOperation struct { - // Kind determines how to interpret the other fields in this struct. - Kind operationKind - B1, B2 byte - B3 bool - U1, U2 uint64 - U3 uint64 - Us []uint64 -} - -// String implements fmt.Stringer. -func (o unionOperation) String() string { - switch o.Kind { - case operationKindUnreachable, - operationKindSelect, - operationKindMemorySize, - operationKindMemoryGrow, - operationKindI32WrapFromI64, - operationKindF32DemoteFromF64, - operationKindF64PromoteFromF32, - operationKindI32ReinterpretFromF32, - operationKindI64ReinterpretFromF64, - operationKindF32ReinterpretFromI32, - operationKindF64ReinterpretFromI64, - operationKindSignExtend32From8, - operationKindSignExtend32From16, - operationKindSignExtend64From8, - operationKindSignExtend64From16, - operationKindSignExtend64From32, - operationKindMemoryInit, - operationKindDataDrop, - operationKindMemoryCopy, - operationKindMemoryFill, - operationKindTableInit, - operationKindElemDrop, - operationKindTableCopy, - operationKindRefFunc, - operationKindTableGet, - operationKindTableSet, - operationKindTableSize, - operationKindTableGrow, - operationKindTableFill, - operationKindBuiltinFunctionCheckExitCode: - return o.Kind.String() - - case operationKindCall, - operationKindGlobalGet, - operationKindGlobalSet: - return fmt.Sprintf("%s %d", o.Kind, o.B1) - - case operationKindLabel: - return label(o.U1).String() - - case operationKindBr: - return fmt.Sprintf("%s %s", o.Kind, label(o.U1).String()) - - case operationKindBrIf: - thenTarget := label(o.U1) - elseTarget := label(o.U2) - return fmt.Sprintf("%s %s, %s", o.Kind, thenTarget, elseTarget) - - case operationKindBrTable: - var targets []string - var defaultLabel label - if len(o.Us) > 0 { - targets = make([]string, len(o.Us)-1) - for i, t := range o.Us[1:] { - targets[i] = label(t).String() - } - defaultLabel = label(o.Us[0]) - } - return fmt.Sprintf("%s [%s] %s", o.Kind, strings.Join(targets, ","), defaultLabel) - - case operationKindCallIndirect: - return fmt.Sprintf("%s: type=%d, table=%d", o.Kind, o.U1, o.U2) - - case operationKindDrop: - start := int64(o.U1) - end := int64(o.U2) - return fmt.Sprintf("%s %d..%d", o.Kind, start, end) - - case operationKindPick, operationKindSet: - return fmt.Sprintf("%s %d (is_vector=%v)", o.Kind, o.U1, o.B3) - - case operationKindLoad, operationKindStore: - return fmt.Sprintf("%s.%s (align=%d, offset=%d)", unsignedType(o.B1), o.Kind, o.U1, o.U2) - - case operationKindLoad8, - operationKindLoad16: - return fmt.Sprintf("%s.%s (align=%d, offset=%d)", signedType(o.B1), o.Kind, o.U1, o.U2) - - case operationKindStore8, - operationKindStore16, - operationKindStore32: - return fmt.Sprintf("%s (align=%d, offset=%d)", o.Kind, o.U1, o.U2) - - case operationKindLoad32: - var t string - if o.B1 == 1 { - t = "i64" - } else { - t = "u64" - } - return fmt.Sprintf("%s.%s (align=%d, offset=%d)", t, o.Kind, o.U1, o.U2) - - case operationKindEq, - operationKindNe, - operationKindAdd, - operationKindSub, - operationKindMul: - return fmt.Sprintf("%s.%s", unsignedType(o.B1), o.Kind) - - case operationKindEqz, - operationKindClz, - operationKindCtz, - operationKindPopcnt, - operationKindAnd, - operationKindOr, - operationKindXor, - operationKindShl, - operationKindRotl, - operationKindRotr: - return fmt.Sprintf("%s.%s", unsignedInt(o.B1), o.Kind) - - case operationKindRem, operationKindShr: - return fmt.Sprintf("%s.%s", signedInt(o.B1), o.Kind) - - case operationKindLt, - operationKindGt, - operationKindLe, - operationKindGe, - operationKindDiv: - return fmt.Sprintf("%s.%s", signedType(o.B1), o.Kind) - - case operationKindAbs, - operationKindNeg, - operationKindCeil, - operationKindFloor, - operationKindTrunc, - operationKindNearest, - operationKindSqrt, - operationKindMin, - operationKindMax, - operationKindCopysign: - return fmt.Sprintf("%s.%s", float(o.B1), o.Kind) - - case operationKindConstI32, - operationKindConstI64: - return fmt.Sprintf("%s %#x", o.Kind, o.U1) - - case operationKindConstF32: - return fmt.Sprintf("%s %f", o.Kind, math.Float32frombits(uint32(o.U1))) - case operationKindConstF64: - return fmt.Sprintf("%s %f", o.Kind, math.Float64frombits(o.U1)) - - case operationKindITruncFromF: - return fmt.Sprintf("%s.%s.%s (non_trapping=%v)", signedInt(o.B2), o.Kind, float(o.B1), o.B3) - case operationKindFConvertFromI: - return fmt.Sprintf("%s.%s.%s", float(o.B2), o.Kind, signedInt(o.B1)) - case operationKindExtend: - var in, out string - if o.B3 { - in = "i32" - out = "i64" - } else { - in = "u32" - out = "u64" - } - return fmt.Sprintf("%s.%s.%s", out, o.Kind, in) - - case operationKindV128Const: - return fmt.Sprintf("%s [%#x, %#x]", o.Kind, o.U1, o.U2) - case operationKindV128Add, - operationKindV128Sub: - return fmt.Sprintf("%s (shape=%s)", o.Kind, shapeName(o.B1)) - case operationKindV128Load, - operationKindV128LoadLane, - operationKindV128Store, - operationKindV128StoreLane, - operationKindV128ExtractLane, - operationKindV128ReplaceLane, - operationKindV128Splat, - operationKindV128Shuffle, - operationKindV128Swizzle, - operationKindV128AnyTrue, - operationKindV128AllTrue, - operationKindV128BitMask, - operationKindV128And, - operationKindV128Not, - operationKindV128Or, - operationKindV128Xor, - operationKindV128Bitselect, - operationKindV128AndNot, - operationKindV128Shl, - operationKindV128Shr, - operationKindV128Cmp, - operationKindV128AddSat, - operationKindV128SubSat, - operationKindV128Mul, - operationKindV128Div, - operationKindV128Neg, - operationKindV128Sqrt, - operationKindV128Abs, - operationKindV128Popcnt, - operationKindV128Min, - operationKindV128Max, - operationKindV128AvgrU, - operationKindV128Pmin, - operationKindV128Pmax, - operationKindV128Ceil, - operationKindV128Floor, - operationKindV128Trunc, - operationKindV128Nearest, - operationKindV128Extend, - operationKindV128ExtMul, - operationKindV128Q15mulrSatS, - operationKindV128ExtAddPairwise, - operationKindV128FloatPromote, - operationKindV128FloatDemote, - operationKindV128FConvertFromI, - operationKindV128Dot, - operationKindV128Narrow: - return o.Kind.String() - - case operationKindV128ITruncSatFromF: - if o.B3 { - return fmt.Sprintf("%s.%sS", o.Kind, shapeName(o.B1)) - } else { - return fmt.Sprintf("%s.%sU", o.Kind, shapeName(o.B1)) - } - - case operationKindAtomicMemoryWait, - operationKindAtomicMemoryNotify, - operationKindAtomicFence, - operationKindAtomicLoad, - operationKindAtomicLoad8, - operationKindAtomicLoad16, - operationKindAtomicStore, - operationKindAtomicStore8, - operationKindAtomicStore16, - operationKindAtomicRMW, - operationKindAtomicRMW8, - operationKindAtomicRMW16, - operationKindAtomicRMWCmpxchg, - operationKindAtomicRMW8Cmpxchg, - operationKindAtomicRMW16Cmpxchg: - return o.Kind.String() - - default: - panic(fmt.Sprintf("TODO: %v", o.Kind)) - } -} - -// NewOperationUnreachable is a constructor for unionOperation with operationKindUnreachable -// -// This corresponds to wasm.OpcodeUnreachable. -// -// The engines are expected to exit the execution with wasmruntime.ErrRuntimeUnreachable error. -func newOperationUnreachable() unionOperation { - return unionOperation{Kind: operationKindUnreachable} -} - -// NewOperationLabel is a constructor for unionOperation with operationKindLabel. -// -// This is used to inform the engines of the beginning of a label. -func newOperationLabel(label label) unionOperation { - return unionOperation{Kind: operationKindLabel, U1: uint64(label)} -} - -// NewOperationBr is a constructor for unionOperation with operationKindBr. -// -// The engines are expected to branch into U1 label. -func newOperationBr(target label) unionOperation { - return unionOperation{Kind: operationKindBr, U1: uint64(target)} -} - -// NewOperationBrIf is a constructor for unionOperation with operationKindBrIf. -// -// The engines are expected to pop a value and branch into U1 label if the value equals 1. -// Otherwise, the code branches into U2 label. -func newOperationBrIf(thenTarget, elseTarget label, thenDrop inclusiveRange) unionOperation { - return unionOperation{ - Kind: operationKindBrIf, - U1: uint64(thenTarget), - U2: uint64(elseTarget), - U3: thenDrop.AsU64(), - } -} - -// NewOperationBrTable is a constructor for unionOperation with operationKindBrTable. -// -// This corresponds to wasm.OpcodeBrTableName except that the label -// here means the interpreterir level, not the ones of Wasm. -// -// The engines are expected to do the br_table operation based on the default (Us[len(Us)-1], Us[len(Us)-2]) and -// targets (Us[:len(Us)-1], Rs[:len(Us)-1]). More precisely, this pops a value from the stack (called "index") -// and decides which branch we go into next based on the value. -// -// For example, assume we have operations like {default: L_DEFAULT, targets: [L0, L1, L2]}. -// If "index" >= len(defaults), then branch into the L_DEFAULT label. -// Otherwise, we enter label of targets[index]. -func newOperationBrTable(targetLabelsAndRanges []uint64) unionOperation { - return unionOperation{ - Kind: operationKindBrTable, - Us: targetLabelsAndRanges, - } -} - -// NewOperationCall is a constructor for unionOperation with operationKindCall. -// -// This corresponds to wasm.OpcodeCallName, and engines are expected to -// enter into a function whose index equals OperationCall.FunctionIndex. -func newOperationCall(functionIndex uint32) unionOperation { - return unionOperation{Kind: operationKindCall, U1: uint64(functionIndex)} -} - -// NewOperationCallIndirect implements Operation. -// -// This corresponds to wasm.OpcodeCallIndirectName, and engines are expected to -// consume the one value from the top of stack (called "offset"), -// and make a function call against the function whose function address equals -// Tables[OperationCallIndirect.TableIndex][offset]. -// -// Note: This is called indirect function call in the sense that the target function is indirectly -// determined by the current state (top value) of the stack. -// Therefore, two checks are performed at runtime before entering the target function: -// 1) whether "offset" exceeds the length of table Tables[OperationCallIndirect.TableIndex]. -// 2) whether the type of the function table[offset] matches the function type specified by OperationCallIndirect.TypeIndex. -func newOperationCallIndirect(typeIndex, tableIndex uint32) unionOperation { - return unionOperation{Kind: operationKindCallIndirect, U1: uint64(typeIndex), U2: uint64(tableIndex)} -} - -// inclusiveRange is the range which spans across the value stack starting from the top to the bottom, and -// both boundary are included in the range. -type inclusiveRange struct { - Start, End int32 -} - -// AsU64 is be used to convert inclusiveRange to uint64 so that it can be stored in unionOperation. -func (i inclusiveRange) AsU64() uint64 { - return uint64(uint32(i.Start))<<32 | uint64(uint32(i.End)) -} - -// inclusiveRangeFromU64 retrieves inclusiveRange from the given uint64 which is stored in unionOperation. -func inclusiveRangeFromU64(v uint64) inclusiveRange { - return inclusiveRange{ - Start: int32(uint32(v >> 32)), - End: int32(uint32(v)), - } -} - -// nopinclusiveRange is inclusiveRange which corresponds to no-operation. -var nopinclusiveRange = inclusiveRange{Start: -1, End: -1} - -// NewOperationDrop is a constructor for unionOperation with operationKindDrop. -// -// The engines are expected to discard the values selected by NewOperationDrop.Depth which -// starts from the top of the stack to the bottom. -// -// depth spans across the uint64 value stack at runtime to be dropped by this operation. -func newOperationDrop(depth inclusiveRange) unionOperation { - return unionOperation{Kind: operationKindDrop, U1: depth.AsU64()} -} - -// NewOperationSelect is a constructor for unionOperation with operationKindSelect. -// -// This corresponds to wasm.OpcodeSelect. -// -// The engines are expected to pop three values, say [..., x2, x1, c], then if the value "c" equals zero, -// "x1" is pushed back onto the stack and, otherwise "x2" is pushed back. -// -// isTargetVector true if the selection target value's type is wasm.ValueTypeV128. -func newOperationSelect(isTargetVector bool) unionOperation { - return unionOperation{Kind: operationKindSelect, B3: isTargetVector} -} - -// NewOperationPick is a constructor for unionOperation with operationKindPick. -// -// The engines are expected to copy a value pointed by depth, and push the -// copied value onto the top of the stack. -// -// depth is the location of the pick target in the uint64 value stack at runtime. -// If isTargetVector=true, this points to the location of the lower 64-bits of the vector. -func newOperationPick(depth int, isTargetVector bool) unionOperation { - return unionOperation{Kind: operationKindPick, U1: uint64(depth), B3: isTargetVector} -} - -// NewOperationSet is a constructor for unionOperation with operationKindSet. -// -// The engines are expected to set the top value of the stack to the location specified by -// depth. -// -// depth is the location of the set target in the uint64 value stack at runtime. -// If isTargetVector=true, this points the location of the lower 64-bits of the vector. -func newOperationSet(depth int, isTargetVector bool) unionOperation { - return unionOperation{Kind: operationKindSet, U1: uint64(depth), B3: isTargetVector} -} - -// NewOperationGlobalGet is a constructor for unionOperation with operationKindGlobalGet. -// -// The engines are expected to read the global value specified by OperationGlobalGet.Index, -// and push the copy of the value onto the stack. -// -// See wasm.OpcodeGlobalGet. -func newOperationGlobalGet(index uint32) unionOperation { - return unionOperation{Kind: operationKindGlobalGet, U1: uint64(index)} -} - -// NewOperationGlobalSet is a constructor for unionOperation with operationKindGlobalSet. -// -// The engines are expected to consume the value from the top of the stack, -// and write the value into the global specified by OperationGlobalSet.Index. -// -// See wasm.OpcodeGlobalSet. -func newOperationGlobalSet(index uint32) unionOperation { - return unionOperation{Kind: operationKindGlobalSet, U1: uint64(index)} -} - -// memoryArg is the "memarg" to all memory instructions. -// -// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#memory-instructions%E2%91%A0 -type memoryArg struct { - // Alignment the expected alignment (expressed as the exponent of a power of 2). Default to the natural alignment. - // - // "Natural alignment" is defined here as the smallest power of two that can hold the size of the value type. Ex - // wasm.ValueTypeI64 is encoded in 8 little-endian bytes. 2^3 = 8, so the natural alignment is three. - Alignment uint32 - - // Offset is the address offset added to the instruction's dynamic address operand, yielding a 33-bit effective - // address that is the zero-based index at which the memory is accessed. Default to zero. - Offset uint32 -} - -// NewOperationLoad is a constructor for unionOperation with operationKindLoad. -// -// This corresponds to wasm.OpcodeI32LoadName wasm.OpcodeI64LoadName wasm.OpcodeF32LoadName and wasm.OpcodeF64LoadName. -// -// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, -// otherwise load the corresponding value following the semantics of the corresponding WebAssembly instruction. -func newOperationLoad(unsignedType unsignedType, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindLoad, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationLoad8 is a constructor for unionOperation with operationKindLoad8. -// -// This corresponds to wasm.OpcodeI32Load8SName wasm.OpcodeI32Load8UName wasm.OpcodeI64Load8SName wasm.OpcodeI64Load8UName. -// -// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, -// otherwise load the corresponding value following the semantics of the corresponding WebAssembly instruction. -func newOperationLoad8(signedInt signedInt, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindLoad8, B1: byte(signedInt), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationLoad16 is a constructor for unionOperation with operationKindLoad16. -// -// This corresponds to wasm.OpcodeI32Load16SName wasm.OpcodeI32Load16UName wasm.OpcodeI64Load16SName wasm.OpcodeI64Load16UName. -// -// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, -// otherwise load the corresponding value following the semantics of the corresponding WebAssembly instruction. -func newOperationLoad16(signedInt signedInt, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindLoad16, B1: byte(signedInt), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationLoad32 is a constructor for unionOperation with operationKindLoad32. -// -// This corresponds to wasm.OpcodeI64Load32SName wasm.OpcodeI64Load32UName. -// -// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, -// otherwise load the corresponding value following the semantics of the corresponding WebAssembly instruction. -func newOperationLoad32(signed bool, arg memoryArg) unionOperation { - sigB := byte(0) - if signed { - sigB = 1 - } - return unionOperation{Kind: operationKindLoad32, B1: sigB, U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationStore is a constructor for unionOperation with operationKindStore. -// -// # This corresponds to wasm.OpcodeI32StoreName wasm.OpcodeI64StoreName wasm.OpcodeF32StoreName wasm.OpcodeF64StoreName -// -// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, -// otherwise store the corresponding value following the semantics of the corresponding WebAssembly instruction. -func newOperationStore(unsignedType unsignedType, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindStore, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationStore8 is a constructor for unionOperation with operationKindStore8. -// -// # This corresponds to wasm.OpcodeI32Store8Name wasm.OpcodeI64Store8Name -// -// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, -// otherwise store the corresponding value following the semantics of the corresponding WebAssembly instruction. -func newOperationStore8(arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindStore8, U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationStore16 is a constructor for unionOperation with operationKindStore16. -// -// # This corresponds to wasm.OpcodeI32Store16Name wasm.OpcodeI64Store16Name -// -// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, -// otherwise store the corresponding value following the semantics of the corresponding WebAssembly instruction. -func newOperationStore16(arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindStore16, U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationStore32 is a constructor for unionOperation with operationKindStore32. -// -// # This corresponds to wasm.OpcodeI64Store32Name -// -// The engines are expected to check the boundary of memory length, and exit the execution if this exceeds the boundary, -// otherwise store the corresponding value following the semantics of the corresponding WebAssembly instruction. -func newOperationStore32(arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindStore32, U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationMemorySize is a constructor for unionOperation with operationKindMemorySize. -// -// This corresponds to wasm.OpcodeMemorySize. -// -// The engines are expected to push the current page size of the memory onto the stack. -func newOperationMemorySize() unionOperation { - return unionOperation{Kind: operationKindMemorySize} -} - -// NewOperationMemoryGrow is a constructor for unionOperation with operationKindMemoryGrow. -// -// This corresponds to wasm.OpcodeMemoryGrow. -// -// The engines are expected to pop one value from the top of the stack, then -// execute wasm.MemoryInstance Grow with the value, and push the previous -// page size of the memory onto the stack. -func newOperationMemoryGrow() unionOperation { - return unionOperation{Kind: operationKindMemoryGrow} -} - -// NewOperationConstI32 is a constructor for unionOperation with OperationConstI32. -// -// This corresponds to wasm.OpcodeI32Const. -func newOperationConstI32(value uint32) unionOperation { - return unionOperation{Kind: operationKindConstI32, U1: uint64(value)} -} - -// NewOperationConstI64 is a constructor for unionOperation with OperationConstI64. -// -// This corresponds to wasm.OpcodeI64Const. -func newOperationConstI64(value uint64) unionOperation { - return unionOperation{Kind: operationKindConstI64, U1: value} -} - -// NewOperationConstF32 is a constructor for unionOperation with OperationConstF32. -// -// This corresponds to wasm.OpcodeF32Const. -func newOperationConstF32(value float32) unionOperation { - return unionOperation{Kind: operationKindConstF32, U1: uint64(math.Float32bits(value))} -} - -// NewOperationConstF64 is a constructor for unionOperation with OperationConstF64. -// -// This corresponds to wasm.OpcodeF64Const. -func newOperationConstF64(value float64) unionOperation { - return unionOperation{Kind: operationKindConstF64, U1: math.Float64bits(value)} -} - -// NewOperationEq is a constructor for unionOperation with operationKindEq. -// -// This corresponds to wasm.OpcodeI32EqName wasm.OpcodeI64EqName wasm.OpcodeF32EqName wasm.OpcodeF64EqName -func newOperationEq(b unsignedType) unionOperation { - return unionOperation{Kind: operationKindEq, B1: byte(b)} -} - -// NewOperationNe is a constructor for unionOperation with operationKindNe. -// -// This corresponds to wasm.OpcodeI32NeName wasm.OpcodeI64NeName wasm.OpcodeF32NeName wasm.OpcodeF64NeName -func newOperationNe(b unsignedType) unionOperation { - return unionOperation{Kind: operationKindNe, B1: byte(b)} -} - -// NewOperationEqz is a constructor for unionOperation with operationKindEqz. -// -// This corresponds to wasm.OpcodeI32EqzName wasm.OpcodeI64EqzName -func newOperationEqz(b unsignedInt) unionOperation { - return unionOperation{Kind: operationKindEqz, B1: byte(b)} -} - -// NewOperationLt is a constructor for unionOperation with operationKindLt. -// -// This corresponds to wasm.OpcodeI32LtS wasm.OpcodeI32LtU wasm.OpcodeI64LtS wasm.OpcodeI64LtU wasm.OpcodeF32Lt wasm.OpcodeF64Lt -func newOperationLt(b signedType) unionOperation { - return unionOperation{Kind: operationKindLt, B1: byte(b)} -} - -// NewOperationGt is a constructor for unionOperation with operationKindGt. -// -// This corresponds to wasm.OpcodeI32GtS wasm.OpcodeI32GtU wasm.OpcodeI64GtS wasm.OpcodeI64GtU wasm.OpcodeF32Gt wasm.OpcodeF64Gt -func newOperationGt(b signedType) unionOperation { - return unionOperation{Kind: operationKindGt, B1: byte(b)} -} - -// NewOperationLe is a constructor for unionOperation with operationKindLe. -// -// This corresponds to wasm.OpcodeI32LeS wasm.OpcodeI32LeU wasm.OpcodeI64LeS wasm.OpcodeI64LeU wasm.OpcodeF32Le wasm.OpcodeF64Le -func newOperationLe(b signedType) unionOperation { - return unionOperation{Kind: operationKindLe, B1: byte(b)} -} - -// NewOperationGe is a constructor for unionOperation with operationKindGe. -// -// This corresponds to wasm.OpcodeI32GeS wasm.OpcodeI32GeU wasm.OpcodeI64GeS wasm.OpcodeI64GeU wasm.OpcodeF32Ge wasm.OpcodeF64Ge -// NewOperationGe is the constructor for OperationGe -func newOperationGe(b signedType) unionOperation { - return unionOperation{Kind: operationKindGe, B1: byte(b)} -} - -// NewOperationAdd is a constructor for unionOperation with operationKindAdd. -// -// This corresponds to wasm.OpcodeI32AddName wasm.OpcodeI64AddName wasm.OpcodeF32AddName wasm.OpcodeF64AddName. -func newOperationAdd(b unsignedType) unionOperation { - return unionOperation{Kind: operationKindAdd, B1: byte(b)} -} - -// NewOperationSub is a constructor for unionOperation with operationKindSub. -// -// This corresponds to wasm.OpcodeI32SubName wasm.OpcodeI64SubName wasm.OpcodeF32SubName wasm.OpcodeF64SubName. -func newOperationSub(b unsignedType) unionOperation { - return unionOperation{Kind: operationKindSub, B1: byte(b)} -} - -// NewOperationMul is a constructor for unionOperation with wperationKindMul. -// -// This corresponds to wasm.OpcodeI32MulName wasm.OpcodeI64MulName wasm.OpcodeF32MulName wasm.OpcodeF64MulName. -// NewOperationMul is the constructor for OperationMul -func newOperationMul(b unsignedType) unionOperation { - return unionOperation{Kind: operationKindMul, B1: byte(b)} -} - -// NewOperationClz is a constructor for unionOperation with operationKindClz. -// -// This corresponds to wasm.OpcodeI32ClzName wasm.OpcodeI64ClzName. -// -// The engines are expected to count up the leading zeros in the -// current top of the stack, and push the count result. -// For example, stack of [..., 0x00_ff_ff_ff] results in [..., 8]. -// See wasm.OpcodeI32Clz wasm.OpcodeI64Clz -func newOperationClz(b unsignedInt) unionOperation { - return unionOperation{Kind: operationKindClz, B1: byte(b)} -} - -// NewOperationCtz is a constructor for unionOperation with operationKindCtz. -// -// This corresponds to wasm.OpcodeI32CtzName wasm.OpcodeI64CtzName. -// -// The engines are expected to count up the trailing zeros in the -// current top of the stack, and push the count result. -// For example, stack of [..., 0xff_ff_ff_00] results in [..., 8]. -func newOperationCtz(b unsignedInt) unionOperation { - return unionOperation{Kind: operationKindCtz, B1: byte(b)} -} - -// NewOperationPopcnt is a constructor for unionOperation with operationKindPopcnt. -// -// This corresponds to wasm.OpcodeI32PopcntName wasm.OpcodeI64PopcntName. -// -// The engines are expected to count up the number of set bits in the -// current top of the stack, and push the count result. -// For example, stack of [..., 0b00_00_00_11] results in [..., 2]. -func newOperationPopcnt(b unsignedInt) unionOperation { - return unionOperation{Kind: operationKindPopcnt, B1: byte(b)} -} - -// NewOperationDiv is a constructor for unionOperation with operationKindDiv. -// -// This corresponds to wasm.OpcodeI32DivS wasm.OpcodeI32DivU wasm.OpcodeI64DivS -// -// wasm.OpcodeI64DivU wasm.OpcodeF32Div wasm.OpcodeF64Div. -func newOperationDiv(b signedType) unionOperation { - return unionOperation{Kind: operationKindDiv, B1: byte(b)} -} - -// NewOperationRem is a constructor for unionOperation with operationKindRem. -// -// This corresponds to wasm.OpcodeI32RemS wasm.OpcodeI32RemU wasm.OpcodeI64RemS wasm.OpcodeI64RemU. -// -// The engines are expected to perform division on the top -// two values of integer type on the stack and puts the remainder of the result -// onto the stack. For example, stack [..., 10, 3] results in [..., 1] where -// the quotient is discarded. -// NewOperationRem is the constructor for OperationRem -func newOperationRem(b signedInt) unionOperation { - return unionOperation{Kind: operationKindRem, B1: byte(b)} -} - -// NewOperationAnd is a constructor for unionOperation with operationKindAnd. -// -// # This corresponds to wasm.OpcodeI32AndName wasm.OpcodeI64AndName -// -// The engines are expected to perform "And" operation on -// top two values on the stack, and pushes the result. -func newOperationAnd(b unsignedInt) unionOperation { - return unionOperation{Kind: operationKindAnd, B1: byte(b)} -} - -// NewOperationOr is a constructor for unionOperation with operationKindOr. -// -// # This corresponds to wasm.OpcodeI32OrName wasm.OpcodeI64OrName -// -// The engines are expected to perform "Or" operation on -// top two values on the stack, and pushes the result. -func newOperationOr(b unsignedInt) unionOperation { - return unionOperation{Kind: operationKindOr, B1: byte(b)} -} - -// NewOperationXor is a constructor for unionOperation with operationKindXor. -// -// # This corresponds to wasm.OpcodeI32XorName wasm.OpcodeI64XorName -// -// The engines are expected to perform "Xor" operation on -// top two values on the stack, and pushes the result. -func newOperationXor(b unsignedInt) unionOperation { - return unionOperation{Kind: operationKindXor, B1: byte(b)} -} - -// NewOperationShl is a constructor for unionOperation with operationKindShl. -// -// # This corresponds to wasm.OpcodeI32ShlName wasm.OpcodeI64ShlName -// -// The engines are expected to perform "Shl" operation on -// top two values on the stack, and pushes the result. -func newOperationShl(b unsignedInt) unionOperation { - return unionOperation{Kind: operationKindShl, B1: byte(b)} -} - -// NewOperationShr is a constructor for unionOperation with operationKindShr. -// -// # This corresponds to wasm.OpcodeI32ShrSName wasm.OpcodeI32ShrUName wasm.OpcodeI64ShrSName wasm.OpcodeI64ShrUName -// -// If OperationShr.Type is signed integer, then, the engines are expected to perform arithmetic right shift on the two -// top values on the stack, otherwise do the logical right shift. -func newOperationShr(b signedInt) unionOperation { - return unionOperation{Kind: operationKindShr, B1: byte(b)} -} - -// NewOperationRotl is a constructor for unionOperation with operationKindRotl. -// -// # This corresponds to wasm.OpcodeI32RotlName wasm.OpcodeI64RotlName -// -// The engines are expected to perform "Rotl" operation on -// top two values on the stack, and pushes the result. -func newOperationRotl(b unsignedInt) unionOperation { - return unionOperation{Kind: operationKindRotl, B1: byte(b)} -} - -// NewOperationRotr is a constructor for unionOperation with operationKindRotr. -// -// # This corresponds to wasm.OpcodeI32RotrName wasm.OpcodeI64RotrName -// -// The engines are expected to perform "Rotr" operation on -// top two values on the stack, and pushes the result. -func newOperationRotr(b unsignedInt) unionOperation { - return unionOperation{Kind: operationKindRotr, B1: byte(b)} -} - -// NewOperationAbs is a constructor for unionOperation with operationKindAbs. -// -// This corresponds to wasm.OpcodeF32Abs wasm.OpcodeF64Abs -func newOperationAbs(b float) unionOperation { - return unionOperation{Kind: operationKindAbs, B1: byte(b)} -} - -// NewOperationNeg is a constructor for unionOperation with operationKindNeg. -// -// This corresponds to wasm.OpcodeF32Neg wasm.OpcodeF64Neg -func newOperationNeg(b float) unionOperation { - return unionOperation{Kind: operationKindNeg, B1: byte(b)} -} - -// NewOperationCeil is a constructor for unionOperation with operationKindCeil. -// -// This corresponds to wasm.OpcodeF32CeilName wasm.OpcodeF64CeilName -func newOperationCeil(b float) unionOperation { - return unionOperation{Kind: operationKindCeil, B1: byte(b)} -} - -// NewOperationFloor is a constructor for unionOperation with operationKindFloor. -// -// This corresponds to wasm.OpcodeF32FloorName wasm.OpcodeF64FloorName -func newOperationFloor(b float) unionOperation { - return unionOperation{Kind: operationKindFloor, B1: byte(b)} -} - -// NewOperationTrunc is a constructor for unionOperation with operationKindTrunc. -// -// This corresponds to wasm.OpcodeF32TruncName wasm.OpcodeF64TruncName -func newOperationTrunc(b float) unionOperation { - return unionOperation{Kind: operationKindTrunc, B1: byte(b)} -} - -// NewOperationNearest is a constructor for unionOperation with operationKindNearest. -// -// # This corresponds to wasm.OpcodeF32NearestName wasm.OpcodeF64NearestName -// -// Note: this is *not* equivalent to math.Round and instead has the same -// the semantics of LLVM's rint intrinsic. See https://llvm.org/docs/LangRef.html#llvm-rint-intrinsic. -// For example, math.Round(-4.5) produces -5 while we want to produce -4. -func newOperationNearest(b float) unionOperation { - return unionOperation{Kind: operationKindNearest, B1: byte(b)} -} - -// NewOperationSqrt is a constructor for unionOperation with operationKindSqrt. -// -// This corresponds to wasm.OpcodeF32SqrtName wasm.OpcodeF64SqrtName -func newOperationSqrt(b float) unionOperation { - return unionOperation{Kind: operationKindSqrt, B1: byte(b)} -} - -// NewOperationMin is a constructor for unionOperation with operationKindMin. -// -// # This corresponds to wasm.OpcodeF32MinName wasm.OpcodeF64MinName -// -// The engines are expected to pop two values from the stack, and push back the maximum of -// these two values onto the stack. For example, stack [..., 100.1, 1.9] results in [..., 1.9]. -// -// Note: WebAssembly specifies that min/max must always return NaN if one of values is NaN, -// which is a different behavior different from math.Min. -func newOperationMin(b float) unionOperation { - return unionOperation{Kind: operationKindMin, B1: byte(b)} -} - -// NewOperationMax is a constructor for unionOperation with operationKindMax. -// -// # This corresponds to wasm.OpcodeF32MaxName wasm.OpcodeF64MaxName -// -// The engines are expected to pop two values from the stack, and push back the maximum of -// these two values onto the stack. For example, stack [..., 100.1, 1.9] results in [..., 100.1]. -// -// Note: WebAssembly specifies that min/max must always return NaN if one of values is NaN, -// which is a different behavior different from math.Max. -func newOperationMax(b float) unionOperation { - return unionOperation{Kind: operationKindMax, B1: byte(b)} -} - -// NewOperationCopysign is a constructor for unionOperation with operationKindCopysign. -// -// # This corresponds to wasm.OpcodeF32CopysignName wasm.OpcodeF64CopysignName -// -// The engines are expected to pop two float values from the stack, and copy the signbit of -// the first-popped value to the last one. -// For example, stack [..., 1.213, -5.0] results in [..., -1.213]. -func newOperationCopysign(b float) unionOperation { - return unionOperation{Kind: operationKindCopysign, B1: byte(b)} -} - -// NewOperationI32WrapFromI64 is a constructor for unionOperation with operationKindI32WrapFromI64. -// -// This corresponds to wasm.OpcodeI32WrapI64 and equivalent to uint64(uint32(v)) in Go. -// -// The engines are expected to replace the 64-bit int on top of the stack -// with the corresponding 32-bit integer. -func newOperationI32WrapFromI64() unionOperation { - return unionOperation{Kind: operationKindI32WrapFromI64} -} - -// NewOperationITruncFromF is a constructor for unionOperation with operationKindITruncFromF. -// -// This corresponds to -// -// wasm.OpcodeI32TruncF32SName wasm.OpcodeI32TruncF32UName wasm.OpcodeI32TruncF64SName -// wasm.OpcodeI32TruncF64UName wasm.OpcodeI64TruncF32SName wasm.OpcodeI64TruncF32UName wasm.OpcodeI64TruncF64SName -// wasm.OpcodeI64TruncF64UName. wasm.OpcodeI32TruncSatF32SName wasm.OpcodeI32TruncSatF32UName -// wasm.OpcodeI32TruncSatF64SName wasm.OpcodeI32TruncSatF64UName wasm.OpcodeI64TruncSatF32SName -// wasm.OpcodeI64TruncSatF32UName wasm.OpcodeI64TruncSatF64SName wasm.OpcodeI64TruncSatF64UName -// -// See [1] and [2] for when we encounter undefined behavior in the WebAssembly specification if NewOperationITruncFromF.NonTrapping == false. -// To summarize, if the source float value is NaN or doesn't fit in the destination range of integers (incl. +=Inf), -// then the runtime behavior is undefined. In wazero, the engines are expected to exit the execution in these undefined cases with -// wasmruntime.ErrRuntimeInvalidConversionToInteger error. -// -// [1] https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#-hrefop-trunc-umathrmtruncmathsfu_m-n-z for unsigned integers. -// [2] https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#-hrefop-trunc-smathrmtruncmathsfs_m-n-z for signed integers. -// -// nonTrapping true if this conversion is "nontrapping" in the sense of the -// https://github.com/WebAssembly/spec/blob/ce4b6c4d47eb06098cc7ab2e81f24748da822f20/proposals/nontrapping-float-to-int-conversion/Overview.md -func newOperationITruncFromF(inputType float, outputType signedInt, nonTrapping bool) unionOperation { - return unionOperation{ - Kind: operationKindITruncFromF, - B1: byte(inputType), - B2: byte(outputType), - B3: nonTrapping, - } -} - -// NewOperationFConvertFromI is a constructor for unionOperation with operationKindFConvertFromI. -// -// This corresponds to -// -// wasm.OpcodeF32ConvertI32SName wasm.OpcodeF32ConvertI32UName wasm.OpcodeF32ConvertI64SName wasm.OpcodeF32ConvertI64UName -// wasm.OpcodeF64ConvertI32SName wasm.OpcodeF64ConvertI32UName wasm.OpcodeF64ConvertI64SName wasm.OpcodeF64ConvertI64UName -// -// and equivalent to float32(uint32(x)), float32(int32(x)), etc in Go. -func newOperationFConvertFromI(inputType signedInt, outputType float) unionOperation { - return unionOperation{ - Kind: operationKindFConvertFromI, - B1: byte(inputType), - B2: byte(outputType), - } -} - -// NewOperationF32DemoteFromF64 is a constructor for unionOperation with operationKindF32DemoteFromF64. -// -// This corresponds to wasm.OpcodeF32DemoteF64 and is equivalent float32(float64(v)). -func newOperationF32DemoteFromF64() unionOperation { - return unionOperation{Kind: operationKindF32DemoteFromF64} -} - -// NewOperationF64PromoteFromF32 is a constructor for unionOperation with operationKindF64PromoteFromF32. -// -// This corresponds to wasm.OpcodeF64PromoteF32 and is equivalent float64(float32(v)). -func newOperationF64PromoteFromF32() unionOperation { - return unionOperation{Kind: operationKindF64PromoteFromF32} -} - -// NewOperationI32ReinterpretFromF32 is a constructor for unionOperation with operationKindI32ReinterpretFromF32. -// -// This corresponds to wasm.OpcodeI32ReinterpretF32Name. -func newOperationI32ReinterpretFromF32() unionOperation { - return unionOperation{Kind: operationKindI32ReinterpretFromF32} -} - -// NewOperationI64ReinterpretFromF64 is a constructor for unionOperation with operationKindI64ReinterpretFromF64. -// -// This corresponds to wasm.OpcodeI64ReinterpretF64Name. -func newOperationI64ReinterpretFromF64() unionOperation { - return unionOperation{Kind: operationKindI64ReinterpretFromF64} -} - -// NewOperationF32ReinterpretFromI32 is a constructor for unionOperation with operationKindF32ReinterpretFromI32. -// -// This corresponds to wasm.OpcodeF32ReinterpretI32Name. -func newOperationF32ReinterpretFromI32() unionOperation { - return unionOperation{Kind: operationKindF32ReinterpretFromI32} -} - -// NewOperationF64ReinterpretFromI64 is a constructor for unionOperation with operationKindF64ReinterpretFromI64. -// -// This corresponds to wasm.OpcodeF64ReinterpretI64Name. -func newOperationF64ReinterpretFromI64() unionOperation { - return unionOperation{Kind: operationKindF64ReinterpretFromI64} -} - -// NewOperationExtend is a constructor for unionOperation with operationKindExtend. -// -// # This corresponds to wasm.OpcodeI64ExtendI32SName wasm.OpcodeI64ExtendI32UName -// -// The engines are expected to extend the 32-bit signed or unsigned int on top of the stack -// as a 64-bit integer of corresponding signedness. For unsigned case, this is just reinterpreting the -// underlying bit pattern as 64-bit integer. For signed case, this is sign-extension which preserves the -// original integer's sign. -func newOperationExtend(signed bool) unionOperation { - op := unionOperation{Kind: operationKindExtend} - if signed { - op.B1 = 1 - } - return op -} - -// NewOperationSignExtend32From8 is a constructor for unionOperation with operationKindSignExtend32From8. -// -// This corresponds to wasm.OpcodeI32Extend8SName. -// -// The engines are expected to sign-extend the first 8-bits of 32-bit in as signed 32-bit int. -func newOperationSignExtend32From8() unionOperation { - return unionOperation{Kind: operationKindSignExtend32From8} -} - -// NewOperationSignExtend32From16 is a constructor for unionOperation with operationKindSignExtend32From16. -// -// This corresponds to wasm.OpcodeI32Extend16SName. -// -// The engines are expected to sign-extend the first 16-bits of 32-bit in as signed 32-bit int. -func newOperationSignExtend32From16() unionOperation { - return unionOperation{Kind: operationKindSignExtend32From16} -} - -// NewOperationSignExtend64From8 is a constructor for unionOperation with operationKindSignExtend64From8. -// -// This corresponds to wasm.OpcodeI64Extend8SName. -// -// The engines are expected to sign-extend the first 8-bits of 64-bit in as signed 32-bit int. -func newOperationSignExtend64From8() unionOperation { - return unionOperation{Kind: operationKindSignExtend64From8} -} - -// NewOperationSignExtend64From16 is a constructor for unionOperation with operationKindSignExtend64From16. -// -// This corresponds to wasm.OpcodeI64Extend16SName. -// -// The engines are expected to sign-extend the first 16-bits of 64-bit in as signed 32-bit int. -func newOperationSignExtend64From16() unionOperation { - return unionOperation{Kind: operationKindSignExtend64From16} -} - -// NewOperationSignExtend64From32 is a constructor for unionOperation with operationKindSignExtend64From32. -// -// This corresponds to wasm.OpcodeI64Extend32SName. -// -// The engines are expected to sign-extend the first 32-bits of 64-bit in as signed 32-bit int. -func newOperationSignExtend64From32() unionOperation { - return unionOperation{Kind: operationKindSignExtend64From32} -} - -// NewOperationMemoryInit is a constructor for unionOperation with operationKindMemoryInit. -// -// This corresponds to wasm.OpcodeMemoryInitName. -// -// dataIndex is the index of the data instance in ModuleInstance.DataInstances -// by which this operation instantiates a part of the memory. -func newOperationMemoryInit(dataIndex uint32) unionOperation { - return unionOperation{Kind: operationKindMemoryInit, U1: uint64(dataIndex)} -} - -// NewOperationDataDrop implements Operation. -// -// This corresponds to wasm.OpcodeDataDropName. -// -// dataIndex is the index of the data instance in ModuleInstance.DataInstances -// which this operation drops. -func newOperationDataDrop(dataIndex uint32) unionOperation { - return unionOperation{Kind: operationKindDataDrop, U1: uint64(dataIndex)} -} - -// NewOperationMemoryCopy is a consuctor for unionOperation with operationKindMemoryCopy. -// -// This corresponds to wasm.OpcodeMemoryCopyName. -func newOperationMemoryCopy() unionOperation { - return unionOperation{Kind: operationKindMemoryCopy} -} - -// NewOperationMemoryFill is a consuctor for unionOperation with operationKindMemoryFill. -func newOperationMemoryFill() unionOperation { - return unionOperation{Kind: operationKindMemoryFill} -} - -// NewOperationTableInit is a constructor for unionOperation with operationKindTableInit. -// -// This corresponds to wasm.OpcodeTableInitName. -// -// elemIndex is the index of the element by which this operation initializes a part of the table. -// tableIndex is the index of the table on which this operation initialize by the target element. -func newOperationTableInit(elemIndex, tableIndex uint32) unionOperation { - return unionOperation{Kind: operationKindTableInit, U1: uint64(elemIndex), U2: uint64(tableIndex)} -} - -// NewOperationElemDrop is a constructor for unionOperation with operationKindElemDrop. -// -// This corresponds to wasm.OpcodeElemDropName. -// -// elemIndex is the index of the element which this operation drops. -func newOperationElemDrop(elemIndex uint32) unionOperation { - return unionOperation{Kind: operationKindElemDrop, U1: uint64(elemIndex)} -} - -// NewOperationTableCopy implements Operation. -// -// This corresponds to wasm.OpcodeTableCopyName. -func newOperationTableCopy(srcTableIndex, dstTableIndex uint32) unionOperation { - return unionOperation{Kind: operationKindTableCopy, U1: uint64(srcTableIndex), U2: uint64(dstTableIndex)} -} - -// NewOperationRefFunc constructor for unionOperation with operationKindRefFunc. -// -// This corresponds to wasm.OpcodeRefFuncName, and engines are expected to -// push the opaque pointer value of engine specific func for the given FunctionIndex. -// -// Note: in wazero, we express any reference types (funcref or externref) as opaque pointers which is uint64. -// Therefore, the engine implementations emit instructions to push the address of *function onto the stack. -func newOperationRefFunc(functionIndex uint32) unionOperation { - return unionOperation{Kind: operationKindRefFunc, U1: uint64(functionIndex)} -} - -// NewOperationTableGet constructor for unionOperation with operationKindTableGet. -// -// This corresponds to wasm.OpcodeTableGetName. -func newOperationTableGet(tableIndex uint32) unionOperation { - return unionOperation{Kind: operationKindTableGet, U1: uint64(tableIndex)} -} - -// NewOperationTableSet constructor for unionOperation with operationKindTableSet. -// -// This corresponds to wasm.OpcodeTableSetName. -func newOperationTableSet(tableIndex uint32) unionOperation { - return unionOperation{Kind: operationKindTableSet, U1: uint64(tableIndex)} -} - -// NewOperationTableSize constructor for unionOperation with operationKindTableSize. -// -// This corresponds to wasm.OpcodeTableSizeName. -func newOperationTableSize(tableIndex uint32) unionOperation { - return unionOperation{Kind: operationKindTableSize, U1: uint64(tableIndex)} -} - -// NewOperationTableGrow constructor for unionOperation with operationKindTableGrow. -// -// This corresponds to wasm.OpcodeTableGrowName. -func newOperationTableGrow(tableIndex uint32) unionOperation { - return unionOperation{Kind: operationKindTableGrow, U1: uint64(tableIndex)} -} - -// NewOperationTableFill constructor for unionOperation with operationKindTableFill. -// -// This corresponds to wasm.OpcodeTableFillName. -func newOperationTableFill(tableIndex uint32) unionOperation { - return unionOperation{Kind: operationKindTableFill, U1: uint64(tableIndex)} -} - -// NewOperationV128Const constructor for unionOperation with operationKindV128Const -func newOperationV128Const(lo, hi uint64) unionOperation { - return unionOperation{Kind: operationKindV128Const, U1: lo, U2: hi} -} - -// shape corresponds to a shape of v128 values. -// https://webassembly.github.io/spec/core/syntax/instructions.html#syntax-shape -type shape = byte - -const ( - shapeI8x16 shape = iota - shapeI16x8 - shapeI32x4 - shapeI64x2 - shapeF32x4 - shapeF64x2 -) - -func shapeName(s shape) (ret string) { - switch s { - case shapeI8x16: - ret = "I8x16" - case shapeI16x8: - ret = "I16x8" - case shapeI32x4: - ret = "I32x4" - case shapeI64x2: - ret = "I64x2" - case shapeF32x4: - ret = "F32x4" - case shapeF64x2: - ret = "F64x2" - } - return -} - -// NewOperationV128Add constructor for unionOperation with operationKindV128Add. -// -// This corresponds to wasm.OpcodeVecI8x16AddName wasm.OpcodeVecI16x8AddName wasm.OpcodeVecI32x4AddName -// -// wasm.OpcodeVecI64x2AddName wasm.OpcodeVecF32x4AddName wasm.OpcodeVecF64x2AddName -func newOperationV128Add(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Add, B1: shape} -} - -// NewOperationV128Sub constructor for unionOperation with operationKindV128Sub. -// -// This corresponds to wasm.OpcodeVecI8x16SubName wasm.OpcodeVecI16x8SubName wasm.OpcodeVecI32x4SubName -// -// wasm.OpcodeVecI64x2SubName wasm.OpcodeVecF32x4SubName wasm.OpcodeVecF64x2SubName -func newOperationV128Sub(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Sub, B1: shape} -} - -// v128LoadType represents a type of wasm.OpcodeVecV128Load* instructions. -type v128LoadType = byte - -const ( - // v128LoadType128 corresponds to wasm.OpcodeVecV128LoadName. - v128LoadType128 v128LoadType = iota - // v128LoadType8x8s corresponds to wasm.OpcodeVecV128Load8x8SName. - v128LoadType8x8s - // v128LoadType8x8u corresponds to wasm.OpcodeVecV128Load8x8UName. - v128LoadType8x8u - // v128LoadType16x4s corresponds to wasm.OpcodeVecV128Load16x4SName - v128LoadType16x4s - // v128LoadType16x4u corresponds to wasm.OpcodeVecV128Load16x4UName - v128LoadType16x4u - // v128LoadType32x2s corresponds to wasm.OpcodeVecV128Load32x2SName - v128LoadType32x2s - // v128LoadType32x2u corresponds to wasm.OpcodeVecV128Load32x2UName - v128LoadType32x2u - // v128LoadType8Splat corresponds to wasm.OpcodeVecV128Load8SplatName - v128LoadType8Splat - // v128LoadType16Splat corresponds to wasm.OpcodeVecV128Load16SplatName - v128LoadType16Splat - // v128LoadType32Splat corresponds to wasm.OpcodeVecV128Load32SplatName - v128LoadType32Splat - // v128LoadType64Splat corresponds to wasm.OpcodeVecV128Load64SplatName - v128LoadType64Splat - // v128LoadType32zero corresponds to wasm.OpcodeVecV128Load32zeroName - v128LoadType32zero - // v128LoadType64zero corresponds to wasm.OpcodeVecV128Load64zeroName - v128LoadType64zero -) - -// NewOperationV128Load is a constructor for unionOperation with operationKindV128Load. -// -// This corresponds to -// -// wasm.OpcodeVecV128LoadName wasm.OpcodeVecV128Load8x8SName wasm.OpcodeVecV128Load8x8UName -// wasm.OpcodeVecV128Load16x4SName wasm.OpcodeVecV128Load16x4UName wasm.OpcodeVecV128Load32x2SName -// wasm.OpcodeVecV128Load32x2UName wasm.OpcodeVecV128Load8SplatName wasm.OpcodeVecV128Load16SplatName -// wasm.OpcodeVecV128Load32SplatName wasm.OpcodeVecV128Load64SplatName wasm.OpcodeVecV128Load32zeroName -// wasm.OpcodeVecV128Load64zeroName -func newOperationV128Load(loadType v128LoadType, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindV128Load, B1: loadType, U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationV128LoadLane is a constructor for unionOperation with operationKindV128LoadLane. -// -// This corresponds to wasm.OpcodeVecV128Load8LaneName wasm.OpcodeVecV128Load16LaneName -// -// wasm.OpcodeVecV128Load32LaneName wasm.OpcodeVecV128Load64LaneName. -// -// laneIndex is >=0 && <(128/LaneSize). -// laneSize is either 8, 16, 32, or 64. -func newOperationV128LoadLane(laneIndex, laneSize byte, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindV128LoadLane, B1: laneSize, B2: laneIndex, U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationV128Store is a constructor for unionOperation with operationKindV128Store. -// -// This corresponds to wasm.OpcodeVecV128Load8LaneName wasm.OpcodeVecV128Load16LaneName -// -// wasm.OpcodeVecV128Load32LaneName wasm.OpcodeVecV128Load64LaneName. -func newOperationV128Store(arg memoryArg) unionOperation { - return unionOperation{ - Kind: operationKindV128Store, - U1: uint64(arg.Alignment), - U2: uint64(arg.Offset), - } -} - -// NewOperationV128StoreLane implements Operation. -// -// This corresponds to wasm.OpcodeVecV128Load8LaneName wasm.OpcodeVecV128Load16LaneName -// -// wasm.OpcodeVecV128Load32LaneName wasm.OpcodeVecV128Load64LaneName. -// -// laneIndex is >=0 && <(128/LaneSize). -// laneSize is either 8, 16, 32, or 64. -func newOperationV128StoreLane(laneIndex byte, laneSize byte, arg memoryArg) unionOperation { - return unionOperation{ - Kind: operationKindV128StoreLane, - B1: laneSize, - B2: laneIndex, - U1: uint64(arg.Alignment), - U2: uint64(arg.Offset), - } -} - -// NewOperationV128ExtractLane is a constructor for unionOperation with operationKindV128ExtractLane. -// -// This corresponds to -// -// wasm.OpcodeVecI8x16ExtractLaneSName wasm.OpcodeVecI8x16ExtractLaneUName -// wasm.OpcodeVecI16x8ExtractLaneSName wasm.OpcodeVecI16x8ExtractLaneUName -// wasm.OpcodeVecI32x4ExtractLaneName wasm.OpcodeVecI64x2ExtractLaneName -// wasm.OpcodeVecF32x4ExtractLaneName wasm.OpcodeVecF64x2ExtractLaneName. -// -// laneIndex is >=0 && <M where shape = NxM. -// signed is used when shape is either i8x16 or i16x2 to specify whether to sign-extend or not. -func newOperationV128ExtractLane(laneIndex byte, signed bool, shape shape) unionOperation { - return unionOperation{ - Kind: operationKindV128ExtractLane, - B1: shape, - B2: laneIndex, - B3: signed, - } -} - -// NewOperationV128ReplaceLane is a constructor for unionOperation with operationKindV128ReplaceLane. -// -// This corresponds to -// -// wasm.OpcodeVecI8x16ReplaceLaneName wasm.OpcodeVecI16x8ReplaceLaneName -// wasm.OpcodeVecI32x4ReplaceLaneName wasm.OpcodeVecI64x2ReplaceLaneName -// wasm.OpcodeVecF32x4ReplaceLaneName wasm.OpcodeVecF64x2ReplaceLaneName. -// -// laneIndex is >=0 && <M where shape = NxM. -func newOperationV128ReplaceLane(laneIndex byte, shape shape) unionOperation { - return unionOperation{Kind: operationKindV128ReplaceLane, B1: shape, B2: laneIndex} -} - -// NewOperationV128Splat is a constructor for unionOperation with operationKindV128Splat. -// -// This corresponds to -// -// wasm.OpcodeVecI8x16SplatName wasm.OpcodeVecI16x8SplatName -// wasm.OpcodeVecI32x4SplatName wasm.OpcodeVecI64x2SplatName -// wasm.OpcodeVecF32x4SplatName wasm.OpcodeVecF64x2SplatName. -func newOperationV128Splat(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Splat, B1: shape} -} - -// NewOperationV128Shuffle is a constructor for unionOperation with operationKindV128Shuffle. -func newOperationV128Shuffle(lanes []uint64) unionOperation { - return unionOperation{Kind: operationKindV128Shuffle, Us: lanes} -} - -// NewOperationV128Swizzle is a constructor for unionOperation with operationKindV128Swizzle. -// -// This corresponds to wasm.OpcodeVecI8x16SwizzleName. -func newOperationV128Swizzle() unionOperation { - return unionOperation{Kind: operationKindV128Swizzle} -} - -// NewOperationV128AnyTrue is a constructor for unionOperation with operationKindV128AnyTrue. -// -// This corresponds to wasm.OpcodeVecV128AnyTrueName. -func newOperationV128AnyTrue() unionOperation { - return unionOperation{Kind: operationKindV128AnyTrue} -} - -// NewOperationV128AllTrue is a constructor for unionOperation with operationKindV128AllTrue. -// -// This corresponds to -// -// wasm.OpcodeVecI8x16AllTrueName wasm.OpcodeVecI16x8AllTrueName -// wasm.OpcodeVecI32x4AllTrueName wasm.OpcodeVecI64x2AllTrueName. -func newOperationV128AllTrue(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128AllTrue, B1: shape} -} - -// NewOperationV128BitMask is a constructor for unionOperation with operationKindV128BitMask. -// -// This corresponds to -// -// wasm.OpcodeVecI8x16BitMaskName wasm.OpcodeVecI16x8BitMaskName -// wasm.OpcodeVecI32x4BitMaskName wasm.OpcodeVecI64x2BitMaskName. -func newOperationV128BitMask(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128BitMask, B1: shape} -} - -// NewOperationV128And is a constructor for unionOperation with operationKindV128And. -// -// This corresponds to wasm.OpcodeVecV128And. -func newOperationV128And() unionOperation { - return unionOperation{Kind: operationKindV128And} -} - -// NewOperationV128Not is a constructor for unionOperation with operationKindV128Not. -// -// This corresponds to wasm.OpcodeVecV128Not. -func newOperationV128Not() unionOperation { - return unionOperation{Kind: operationKindV128Not} -} - -// NewOperationV128Or is a constructor for unionOperation with operationKindV128Or. -// -// This corresponds to wasm.OpcodeVecV128Or. -func newOperationV128Or() unionOperation { - return unionOperation{Kind: operationKindV128Or} -} - -// NewOperationV128Xor is a constructor for unionOperation with operationKindV128Xor. -// -// This corresponds to wasm.OpcodeVecV128Xor. -func newOperationV128Xor() unionOperation { - return unionOperation{Kind: operationKindV128Xor} -} - -// NewOperationV128Bitselect is a constructor for unionOperation with operationKindV128Bitselect. -// -// This corresponds to wasm.OpcodeVecV128Bitselect. -func newOperationV128Bitselect() unionOperation { - return unionOperation{Kind: operationKindV128Bitselect} -} - -// NewOperationV128AndNot is a constructor for unionOperation with operationKindV128AndNot. -// -// This corresponds to wasm.OpcodeVecV128AndNot. -func newOperationV128AndNot() unionOperation { - return unionOperation{Kind: operationKindV128AndNot} -} - -// NewOperationV128Shl is a constructor for unionOperation with operationKindV128Shl. -// -// This corresponds to -// -// wasm.OpcodeVecI8x16ShlName wasm.OpcodeVecI16x8ShlName -// wasm.OpcodeVecI32x4ShlName wasm.OpcodeVecI64x2ShlName -func newOperationV128Shl(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Shl, B1: shape} -} - -// NewOperationV128Shr is a constructor for unionOperation with operationKindV128Shr. -// -// This corresponds to -// -// wasm.OpcodeVecI8x16ShrSName wasm.OpcodeVecI8x16ShrUName wasm.OpcodeVecI16x8ShrSName -// wasm.OpcodeVecI16x8ShrUName wasm.OpcodeVecI32x4ShrSName wasm.OpcodeVecI32x4ShrUName. -// wasm.OpcodeVecI64x2ShrSName wasm.OpcodeVecI64x2ShrUName. -func newOperationV128Shr(shape shape, signed bool) unionOperation { - return unionOperation{Kind: operationKindV128Shr, B1: shape, B3: signed} -} - -// NewOperationV128Cmp is a constructor for unionOperation with operationKindV128Cmp. -// -// This corresponds to -// -// wasm.OpcodeVecI8x16EqName, wasm.OpcodeVecI8x16NeName, wasm.OpcodeVecI8x16LtSName, wasm.OpcodeVecI8x16LtUName, wasm.OpcodeVecI8x16GtSName, -// wasm.OpcodeVecI8x16GtUName, wasm.OpcodeVecI8x16LeSName, wasm.OpcodeVecI8x16LeUName, wasm.OpcodeVecI8x16GeSName, wasm.OpcodeVecI8x16GeUName, -// wasm.OpcodeVecI16x8EqName, wasm.OpcodeVecI16x8NeName, wasm.OpcodeVecI16x8LtSName, wasm.OpcodeVecI16x8LtUName, wasm.OpcodeVecI16x8GtSName, -// wasm.OpcodeVecI16x8GtUName, wasm.OpcodeVecI16x8LeSName, wasm.OpcodeVecI16x8LeUName, wasm.OpcodeVecI16x8GeSName, wasm.OpcodeVecI16x8GeUName, -// wasm.OpcodeVecI32x4EqName, wasm.OpcodeVecI32x4NeName, wasm.OpcodeVecI32x4LtSName, wasm.OpcodeVecI32x4LtUName, wasm.OpcodeVecI32x4GtSName, -// wasm.OpcodeVecI32x4GtUName, wasm.OpcodeVecI32x4LeSName, wasm.OpcodeVecI32x4LeUName, wasm.OpcodeVecI32x4GeSName, wasm.OpcodeVecI32x4GeUName, -// wasm.OpcodeVecI64x2EqName, wasm.OpcodeVecI64x2NeName, wasm.OpcodeVecI64x2LtSName, wasm.OpcodeVecI64x2GtSName, wasm.OpcodeVecI64x2LeSName, -// wasm.OpcodeVecI64x2GeSName, wasm.OpcodeVecF32x4EqName, wasm.OpcodeVecF32x4NeName, wasm.OpcodeVecF32x4LtName, wasm.OpcodeVecF32x4GtName, -// wasm.OpcodeVecF32x4LeName, wasm.OpcodeVecF32x4GeName, wasm.OpcodeVecF64x2EqName, wasm.OpcodeVecF64x2NeName, wasm.OpcodeVecF64x2LtName, -// wasm.OpcodeVecF64x2GtName, wasm.OpcodeVecF64x2LeName, wasm.OpcodeVecF64x2GeName -func newOperationV128Cmp(cmpType v128CmpType) unionOperation { - return unionOperation{Kind: operationKindV128Cmp, B1: cmpType} -} - -// v128CmpType represents a type of vector comparison operation. -type v128CmpType = byte - -const ( - // v128CmpTypeI8x16Eq corresponds to wasm.OpcodeVecI8x16EqName. - v128CmpTypeI8x16Eq v128CmpType = iota - // v128CmpTypeI8x16Ne corresponds to wasm.OpcodeVecI8x16NeName. - v128CmpTypeI8x16Ne - // v128CmpTypeI8x16LtS corresponds to wasm.OpcodeVecI8x16LtSName. - v128CmpTypeI8x16LtS - // v128CmpTypeI8x16LtU corresponds to wasm.OpcodeVecI8x16LtUName. - v128CmpTypeI8x16LtU - // v128CmpTypeI8x16GtS corresponds to wasm.OpcodeVecI8x16GtSName. - v128CmpTypeI8x16GtS - // v128CmpTypeI8x16GtU corresponds to wasm.OpcodeVecI8x16GtUName. - v128CmpTypeI8x16GtU - // v128CmpTypeI8x16LeS corresponds to wasm.OpcodeVecI8x16LeSName. - v128CmpTypeI8x16LeS - // v128CmpTypeI8x16LeU corresponds to wasm.OpcodeVecI8x16LeUName. - v128CmpTypeI8x16LeU - // v128CmpTypeI8x16GeS corresponds to wasm.OpcodeVecI8x16GeSName. - v128CmpTypeI8x16GeS - // v128CmpTypeI8x16GeU corresponds to wasm.OpcodeVecI8x16GeUName. - v128CmpTypeI8x16GeU - // v128CmpTypeI16x8Eq corresponds to wasm.OpcodeVecI16x8EqName. - v128CmpTypeI16x8Eq - // v128CmpTypeI16x8Ne corresponds to wasm.OpcodeVecI16x8NeName. - v128CmpTypeI16x8Ne - // v128CmpTypeI16x8LtS corresponds to wasm.OpcodeVecI16x8LtSName. - v128CmpTypeI16x8LtS - // v128CmpTypeI16x8LtU corresponds to wasm.OpcodeVecI16x8LtUName. - v128CmpTypeI16x8LtU - // v128CmpTypeI16x8GtS corresponds to wasm.OpcodeVecI16x8GtSName. - v128CmpTypeI16x8GtS - // v128CmpTypeI16x8GtU corresponds to wasm.OpcodeVecI16x8GtUName. - v128CmpTypeI16x8GtU - // v128CmpTypeI16x8LeS corresponds to wasm.OpcodeVecI16x8LeSName. - v128CmpTypeI16x8LeS - // v128CmpTypeI16x8LeU corresponds to wasm.OpcodeVecI16x8LeUName. - v128CmpTypeI16x8LeU - // v128CmpTypeI16x8GeS corresponds to wasm.OpcodeVecI16x8GeSName. - v128CmpTypeI16x8GeS - // v128CmpTypeI16x8GeU corresponds to wasm.OpcodeVecI16x8GeUName. - v128CmpTypeI16x8GeU - // v128CmpTypeI32x4Eq corresponds to wasm.OpcodeVecI32x4EqName. - v128CmpTypeI32x4Eq - // v128CmpTypeI32x4Ne corresponds to wasm.OpcodeVecI32x4NeName. - v128CmpTypeI32x4Ne - // v128CmpTypeI32x4LtS corresponds to wasm.OpcodeVecI32x4LtSName. - v128CmpTypeI32x4LtS - // v128CmpTypeI32x4LtU corresponds to wasm.OpcodeVecI32x4LtUName. - v128CmpTypeI32x4LtU - // v128CmpTypeI32x4GtS corresponds to wasm.OpcodeVecI32x4GtSName. - v128CmpTypeI32x4GtS - // v128CmpTypeI32x4GtU corresponds to wasm.OpcodeVecI32x4GtUName. - v128CmpTypeI32x4GtU - // v128CmpTypeI32x4LeS corresponds to wasm.OpcodeVecI32x4LeSName. - v128CmpTypeI32x4LeS - // v128CmpTypeI32x4LeU corresponds to wasm.OpcodeVecI32x4LeUName. - v128CmpTypeI32x4LeU - // v128CmpTypeI32x4GeS corresponds to wasm.OpcodeVecI32x4GeSName. - v128CmpTypeI32x4GeS - // v128CmpTypeI32x4GeU corresponds to wasm.OpcodeVecI32x4GeUName. - v128CmpTypeI32x4GeU - // v128CmpTypeI64x2Eq corresponds to wasm.OpcodeVecI64x2EqName. - v128CmpTypeI64x2Eq - // v128CmpTypeI64x2Ne corresponds to wasm.OpcodeVecI64x2NeName. - v128CmpTypeI64x2Ne - // v128CmpTypeI64x2LtS corresponds to wasm.OpcodeVecI64x2LtSName. - v128CmpTypeI64x2LtS - // v128CmpTypeI64x2GtS corresponds to wasm.OpcodeVecI64x2GtSName. - v128CmpTypeI64x2GtS - // v128CmpTypeI64x2LeS corresponds to wasm.OpcodeVecI64x2LeSName. - v128CmpTypeI64x2LeS - // v128CmpTypeI64x2GeS corresponds to wasm.OpcodeVecI64x2GeSName. - v128CmpTypeI64x2GeS - // v128CmpTypeF32x4Eq corresponds to wasm.OpcodeVecF32x4EqName. - v128CmpTypeF32x4Eq - // v128CmpTypeF32x4Ne corresponds to wasm.OpcodeVecF32x4NeName. - v128CmpTypeF32x4Ne - // v128CmpTypeF32x4Lt corresponds to wasm.OpcodeVecF32x4LtName. - v128CmpTypeF32x4Lt - // v128CmpTypeF32x4Gt corresponds to wasm.OpcodeVecF32x4GtName. - v128CmpTypeF32x4Gt - // v128CmpTypeF32x4Le corresponds to wasm.OpcodeVecF32x4LeName. - v128CmpTypeF32x4Le - // v128CmpTypeF32x4Ge corresponds to wasm.OpcodeVecF32x4GeName. - v128CmpTypeF32x4Ge - // v128CmpTypeF64x2Eq corresponds to wasm.OpcodeVecF64x2EqName. - v128CmpTypeF64x2Eq - // v128CmpTypeF64x2Ne corresponds to wasm.OpcodeVecF64x2NeName. - v128CmpTypeF64x2Ne - // v128CmpTypeF64x2Lt corresponds to wasm.OpcodeVecF64x2LtName. - v128CmpTypeF64x2Lt - // v128CmpTypeF64x2Gt corresponds to wasm.OpcodeVecF64x2GtName. - v128CmpTypeF64x2Gt - // v128CmpTypeF64x2Le corresponds to wasm.OpcodeVecF64x2LeName. - v128CmpTypeF64x2Le - // v128CmpTypeF64x2Ge corresponds to wasm.OpcodeVecF64x2GeName. - v128CmpTypeF64x2Ge -) - -// NewOperationV128AddSat is a constructor for unionOperation with operationKindV128AddSat. -// -// This corresponds to wasm.OpcodeVecI8x16AddSatUName wasm.OpcodeVecI8x16AddSatSName -// -// wasm.OpcodeVecI16x8AddSatUName wasm.OpcodeVecI16x8AddSatSName -// -// shape is either shapeI8x16 or shapeI16x8. -func newOperationV128AddSat(shape shape, signed bool) unionOperation { - return unionOperation{Kind: operationKindV128AddSat, B1: shape, B3: signed} -} - -// NewOperationV128SubSat is a constructor for unionOperation with operationKindV128SubSat. -// -// This corresponds to wasm.OpcodeVecI8x16SubSatUName wasm.OpcodeVecI8x16SubSatSName -// -// wasm.OpcodeVecI16x8SubSatUName wasm.OpcodeVecI16x8SubSatSName -// -// shape is either shapeI8x16 or shapeI16x8. -func newOperationV128SubSat(shape shape, signed bool) unionOperation { - return unionOperation{Kind: operationKindV128SubSat, B1: shape, B3: signed} -} - -// NewOperationV128Mul is a constructor for unionOperation with operationKindV128Mul -// -// This corresponds to wasm.OpcodeVecF32x4MulName wasm.OpcodeVecF64x2MulName -// -// wasm.OpcodeVecI16x8MulName wasm.OpcodeVecI32x4MulName wasm.OpcodeVecI64x2MulName. -// shape is either shapeI16x8, shapeI32x4, shapeI64x2, shapeF32x4 or shapeF64x2. -func newOperationV128Mul(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Mul, B1: shape} -} - -// NewOperationV128Div is a constructor for unionOperation with operationKindV128Div. -// -// This corresponds to wasm.OpcodeVecF32x4DivName wasm.OpcodeVecF64x2DivName. -// shape is either shapeF32x4 or shapeF64x2. -func newOperationV128Div(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Div, B1: shape} -} - -// NewOperationV128Neg is a constructor for unionOperation with operationKindV128Neg. -// -// This corresponds to wasm.OpcodeVecI8x16NegName wasm.OpcodeVecI16x8NegName wasm.OpcodeVecI32x4NegName -// -// wasm.OpcodeVecI64x2NegName wasm.OpcodeVecF32x4NegName wasm.OpcodeVecF64x2NegName. -func newOperationV128Neg(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Neg, B1: shape} -} - -// NewOperationV128Sqrt is a constructor for unionOperation with 128operationKindV128Sqrt. -// -// shape is either shapeF32x4 or shapeF64x2. -// This corresponds to wasm.OpcodeVecF32x4SqrtName wasm.OpcodeVecF64x2SqrtName. -func newOperationV128Sqrt(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Sqrt, B1: shape} -} - -// NewOperationV128Abs is a constructor for unionOperation with operationKindV128Abs. -// -// This corresponds to wasm.OpcodeVecI8x16AbsName wasm.OpcodeVecI16x8AbsName wasm.OpcodeVecI32x4AbsName -// -// wasm.OpcodeVecI64x2AbsName wasm.OpcodeVecF32x4AbsName wasm.OpcodeVecF64x2AbsName. -func newOperationV128Abs(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Abs, B1: shape} -} - -// NewOperationV128Popcnt is a constructor for unionOperation with operationKindV128Popcnt. -// -// This corresponds to wasm.OpcodeVecI8x16PopcntName. -func newOperationV128Popcnt(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Popcnt, B1: shape} -} - -// NewOperationV128Min is a constructor for unionOperation with operationKindV128Min. -// -// This corresponds to -// -// wasm.OpcodeVecI8x16MinSName wasm.OpcodeVecI8x16MinUName wasm.OpcodeVecI16x8MinSName wasm.OpcodeVecI16x8MinUName -// wasm.OpcodeVecI32x4MinSName wasm.OpcodeVecI32x4MinUName wasm.OpcodeVecI16x8MinSName wasm.OpcodeVecI16x8MinUName -// wasm.OpcodeVecF32x4MinName wasm.OpcodeVecF64x2MinName -func newOperationV128Min(shape shape, signed bool) unionOperation { - return unionOperation{Kind: operationKindV128Min, B1: shape, B3: signed} -} - -// NewOperationV128Max is a constructor for unionOperation with operationKindV128Max. -// -// This corresponds to -// -// wasm.OpcodeVecI8x16MaxSName wasm.OpcodeVecI8x16MaxUName wasm.OpcodeVecI16x8MaxSName wasm.OpcodeVecI16x8MaxUName -// wasm.OpcodeVecI32x4MaxSName wasm.OpcodeVecI32x4MaxUName wasm.OpcodeVecI16x8MaxSName wasm.OpcodeVecI16x8MaxUName -// wasm.OpcodeVecF32x4MaxName wasm.OpcodeVecF64x2MaxName. -func newOperationV128Max(shape shape, signed bool) unionOperation { - return unionOperation{Kind: operationKindV128Max, B1: shape, B3: signed} -} - -// NewOperationV128AvgrU is a constructor for unionOperation with operationKindV128AvgrU. -// -// This corresponds to wasm.OpcodeVecI8x16AvgrUName. -func newOperationV128AvgrU(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128AvgrU, B1: shape} -} - -// NewOperationV128Pmin is a constructor for unionOperation with operationKindV128Pmin. -// -// This corresponds to wasm.OpcodeVecF32x4PminName wasm.OpcodeVecF64x2PminName. -func newOperationV128Pmin(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Pmin, B1: shape} -} - -// NewOperationV128Pmax is a constructor for unionOperation with operationKindV128Pmax. -// -// This corresponds to wasm.OpcodeVecF32x4PmaxName wasm.OpcodeVecF64x2PmaxName. -func newOperationV128Pmax(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Pmax, B1: shape} -} - -// NewOperationV128Ceil is a constructor for unionOperation with operationKindV128Ceil. -// -// This corresponds to wasm.OpcodeVecF32x4CeilName wasm.OpcodeVecF64x2CeilName -func newOperationV128Ceil(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Ceil, B1: shape} -} - -// NewOperationV128Floor is a constructor for unionOperation with operationKindV128Floor. -// -// This corresponds to wasm.OpcodeVecF32x4FloorName wasm.OpcodeVecF64x2FloorName -func newOperationV128Floor(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Floor, B1: shape} -} - -// NewOperationV128Trunc is a constructor for unionOperation with operationKindV128Trunc. -// -// This corresponds to wasm.OpcodeVecF32x4TruncName wasm.OpcodeVecF64x2TruncName -func newOperationV128Trunc(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Trunc, B1: shape} -} - -// NewOperationV128Nearest is a constructor for unionOperation with operationKindV128Nearest. -// -// This corresponds to wasm.OpcodeVecF32x4NearestName wasm.OpcodeVecF64x2NearestName -func newOperationV128Nearest(shape shape) unionOperation { - return unionOperation{Kind: operationKindV128Nearest, B1: shape} -} - -// NewOperationV128Extend is a constructor for unionOperation with operationKindV128Extend. -// -// This corresponds to -// -// wasm.OpcodeVecI16x8ExtendLowI8x16SName wasm.OpcodeVecI16x8ExtendHighI8x16SName -// wasm.OpcodeVecI16x8ExtendLowI8x16UName wasm.OpcodeVecI16x8ExtendHighI8x16UName -// wasm.OpcodeVecI32x4ExtendLowI16x8SName wasm.OpcodeVecI32x4ExtendHighI16x8SName -// wasm.OpcodeVecI32x4ExtendLowI16x8UName wasm.OpcodeVecI32x4ExtendHighI16x8UName -// wasm.OpcodeVecI64x2ExtendLowI32x4SName wasm.OpcodeVecI64x2ExtendHighI32x4SName -// wasm.OpcodeVecI64x2ExtendLowI32x4UName wasm.OpcodeVecI64x2ExtendHighI32x4UName -// -// originshape is the shape of the original lanes for extension which is -// either shapeI8x16, shapeI16x8, or shapeI32x4. -// useLow true if it uses the lower half of vector for extension. -func newOperationV128Extend(originshape shape, signed bool, useLow bool) unionOperation { - op := unionOperation{Kind: operationKindV128Extend} - op.B1 = originshape - if signed { - op.B2 = 1 - } - op.B3 = useLow - return op -} - -// NewOperationV128ExtMul is a constructor for unionOperation with operationKindV128ExtMul. -// -// This corresponds to -// -// wasm.OpcodeVecI16x8ExtMulLowI8x16SName wasm.OpcodeVecI16x8ExtMulLowI8x16UName -// wasm.OpcodeVecI16x8ExtMulHighI8x16SName wasm.OpcodeVecI16x8ExtMulHighI8x16UName -// wasm.OpcodeVecI32x4ExtMulLowI16x8SName wasm.OpcodeVecI32x4ExtMulLowI16x8UName -// wasm.OpcodeVecI32x4ExtMulHighI16x8SName wasm.OpcodeVecI32x4ExtMulHighI16x8UName -// wasm.OpcodeVecI64x2ExtMulLowI32x4SName wasm.OpcodeVecI64x2ExtMulLowI32x4UName -// wasm.OpcodeVecI64x2ExtMulHighI32x4SName wasm.OpcodeVecI64x2ExtMulHighI32x4UName. -// -// originshape is the shape of the original lanes for extension which is -// either shapeI8x16, shapeI16x8, or shapeI32x4. -// useLow true if it uses the lower half of vector for extension. -func newOperationV128ExtMul(originshape shape, signed bool, useLow bool) unionOperation { - op := unionOperation{Kind: operationKindV128ExtMul} - op.B1 = originshape - if signed { - op.B2 = 1 - } - op.B3 = useLow - return op -} - -// NewOperationV128Q15mulrSatS is a constructor for unionOperation with operationKindV128Q15mulrSatS. -// -// This corresponds to wasm.OpcodeVecI16x8Q15mulrSatSName -func newOperationV128Q15mulrSatS() unionOperation { - return unionOperation{Kind: operationKindV128Q15mulrSatS} -} - -// NewOperationV128ExtAddPairwise is a constructor for unionOperation with operationKindV128ExtAddPairwise. -// -// This corresponds to -// -// wasm.OpcodeVecI16x8ExtaddPairwiseI8x16SName wasm.OpcodeVecI16x8ExtaddPairwiseI8x16UName -// wasm.OpcodeVecI32x4ExtaddPairwiseI16x8SName wasm.OpcodeVecI32x4ExtaddPairwiseI16x8UName. -// -// originshape is the shape of the original lanes for extension which is -// either shapeI8x16, or shapeI16x8. -func newOperationV128ExtAddPairwise(originshape shape, signed bool) unionOperation { - return unionOperation{Kind: operationKindV128ExtAddPairwise, B1: originshape, B3: signed} -} - -// NewOperationV128FloatPromote is a constructor for unionOperation with NewOperationV128FloatPromote. -// -// This corresponds to wasm.OpcodeVecF64x2PromoteLowF32x4ZeroName -// This discards the higher 64-bit of a vector, and promotes two -// 32-bit floats in the lower 64-bit as two 64-bit floats. -func newOperationV128FloatPromote() unionOperation { - return unionOperation{Kind: operationKindV128FloatPromote} -} - -// NewOperationV128FloatDemote is a constructor for unionOperation with NewOperationV128FloatDemote. -// -// This corresponds to wasm.OpcodeVecF32x4DemoteF64x2ZeroName. -func newOperationV128FloatDemote() unionOperation { - return unionOperation{Kind: operationKindV128FloatDemote} -} - -// NewOperationV128FConvertFromI is a constructor for unionOperation with NewOperationV128FConvertFromI. -// -// This corresponds to -// -// wasm.OpcodeVecF32x4ConvertI32x4SName wasm.OpcodeVecF32x4ConvertI32x4UName -// wasm.OpcodeVecF64x2ConvertLowI32x4SName wasm.OpcodeVecF64x2ConvertLowI32x4UName. -// -// destinationshape is the shape of the destination lanes for conversion which is -// either shapeF32x4, or shapeF64x2. -func newOperationV128FConvertFromI(destinationshape shape, signed bool) unionOperation { - return unionOperation{Kind: operationKindV128FConvertFromI, B1: destinationshape, B3: signed} -} - -// NewOperationV128Dot is a constructor for unionOperation with operationKindV128Dot. -// -// This corresponds to wasm.OpcodeVecI32x4DotI16x8SName -func newOperationV128Dot() unionOperation { - return unionOperation{Kind: operationKindV128Dot} -} - -// NewOperationV128Narrow is a constructor for unionOperation with operationKindV128Narrow. -// -// This corresponds to -// -// wasm.OpcodeVecI8x16NarrowI16x8SName wasm.OpcodeVecI8x16NarrowI16x8UName -// wasm.OpcodeVecI16x8NarrowI32x4SName wasm.OpcodeVecI16x8NarrowI32x4UName. -// -// originshape is the shape of the original lanes for narrowing which is -// either shapeI16x8, or shapeI32x4. -func newOperationV128Narrow(originshape shape, signed bool) unionOperation { - return unionOperation{Kind: operationKindV128Narrow, B1: originshape, B3: signed} -} - -// NewOperationV128ITruncSatFromF is a constructor for unionOperation with operationKindV128ITruncSatFromF. -// -// This corresponds to -// -// wasm.OpcodeVecI32x4TruncSatF64x2UZeroName wasm.OpcodeVecI32x4TruncSatF64x2SZeroName -// wasm.OpcodeVecI32x4TruncSatF32x4UName wasm.OpcodeVecI32x4TruncSatF32x4SName. -// -// originshape is the shape of the original lanes for truncation which is -// either shapeF32x4, or shapeF64x2. -func newOperationV128ITruncSatFromF(originshape shape, signed bool) unionOperation { - return unionOperation{Kind: operationKindV128ITruncSatFromF, B1: originshape, B3: signed} -} - -// atomicArithmeticOp is the type for the operation kind of atomic arithmetic operations. -type atomicArithmeticOp byte - -const ( - // atomicArithmeticOpAdd is the kind for an add operation. - atomicArithmeticOpAdd atomicArithmeticOp = iota - // atomicArithmeticOpSub is the kind for a sub operation. - atomicArithmeticOpSub - // atomicArithmeticOpAnd is the kind for a bitwise and operation. - atomicArithmeticOpAnd - // atomicArithmeticOpOr is the kind for a bitwise or operation. - atomicArithmeticOpOr - // atomicArithmeticOpXor is the kind for a bitwise xor operation. - atomicArithmeticOpXor - // atomicArithmeticOpNop is the kind for a nop operation. - atomicArithmeticOpNop -) - -// NewOperationAtomicMemoryWait is a constructor for unionOperation with operationKindAtomicMemoryWait. -// -// This corresponds to -// -// wasm.OpcodeAtomicWait32Name wasm.OpcodeAtomicWait64Name -func newOperationAtomicMemoryWait(unsignedType unsignedType, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindAtomicMemoryWait, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationAtomicMemoryNotify is a constructor for unionOperation with operationKindAtomicMemoryNotify. -// -// This corresponds to -// -// wasm.OpcodeAtomicNotifyName -func newOperationAtomicMemoryNotify(arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindAtomicMemoryNotify, U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationAtomicFence is a constructor for unionOperation with operationKindAtomicFence. -// -// This corresponds to -// -// wasm.OpcodeAtomicFenceName -func newOperationAtomicFence() unionOperation { - return unionOperation{Kind: operationKindAtomicFence} -} - -// NewOperationAtomicLoad is a constructor for unionOperation with operationKindAtomicLoad. -// -// This corresponds to -// -// wasm.OpcodeAtomicI32LoadName wasm.OpcodeAtomicI64LoadName -func newOperationAtomicLoad(unsignedType unsignedType, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindAtomicLoad, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationAtomicLoad8 is a constructor for unionOperation with operationKindAtomicLoad8. -// -// This corresponds to -// -// wasm.OpcodeAtomicI32Load8UName wasm.OpcodeAtomicI64Load8UName -func newOperationAtomicLoad8(unsignedType unsignedType, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindAtomicLoad8, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationAtomicLoad16 is a constructor for unionOperation with operationKindAtomicLoad16. -// -// This corresponds to -// -// wasm.OpcodeAtomicI32Load16UName wasm.OpcodeAtomicI64Load16UName -func newOperationAtomicLoad16(unsignedType unsignedType, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindAtomicLoad16, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationAtomicStore is a constructor for unionOperation with operationKindAtomicStore. -// -// This corresponds to -// -// wasm.OpcodeAtomicI32StoreName wasm.OpcodeAtomicI64StoreName -func newOperationAtomicStore(unsignedType unsignedType, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindAtomicStore, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationAtomicStore8 is a constructor for unionOperation with operationKindAtomicStore8. -// -// This corresponds to -// -// wasm.OpcodeAtomicI32Store8UName wasm.OpcodeAtomicI64Store8UName -func newOperationAtomicStore8(unsignedType unsignedType, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindAtomicStore8, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationAtomicStore16 is a constructor for unionOperation with operationKindAtomicStore16. -// -// This corresponds to -// -// wasm.OpcodeAtomicI32Store16UName wasm.OpcodeAtomicI64Store16UName -func newOperationAtomicStore16(unsignedType unsignedType, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindAtomicStore16, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationAtomicRMW is a constructor for unionOperation with operationKindAtomicRMW. -// -// This corresponds to -// -// wasm.OpcodeAtomicI32RMWAddName wasm.OpcodeAtomicI64RmwAddName -// wasm.OpcodeAtomicI32RMWSubName wasm.OpcodeAtomicI64RmwSubName -// wasm.OpcodeAtomicI32RMWAndName wasm.OpcodeAtomicI64RmwAndName -// wasm.OpcodeAtomicI32RMWOrName wasm.OpcodeAtomicI64RmwOrName -// wasm.OpcodeAtomicI32RMWXorName wasm.OpcodeAtomicI64RmwXorName -func newOperationAtomicRMW(unsignedType unsignedType, arg memoryArg, op atomicArithmeticOp) unionOperation { - return unionOperation{Kind: operationKindAtomicRMW, B1: byte(unsignedType), B2: byte(op), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationAtomicRMW8 is a constructor for unionOperation with operationKindAtomicRMW8. -// -// This corresponds to -// -// wasm.OpcodeAtomicI32RMW8AddUName wasm.OpcodeAtomicI64Rmw8AddUName -// wasm.OpcodeAtomicI32RMW8SubUName wasm.OpcodeAtomicI64Rmw8SubUName -// wasm.OpcodeAtomicI32RMW8AndUName wasm.OpcodeAtomicI64Rmw8AndUName -// wasm.OpcodeAtomicI32RMW8OrUName wasm.OpcodeAtomicI64Rmw8OrUName -// wasm.OpcodeAtomicI32RMW8XorUName wasm.OpcodeAtomicI64Rmw8XorUName -func newOperationAtomicRMW8(unsignedType unsignedType, arg memoryArg, op atomicArithmeticOp) unionOperation { - return unionOperation{Kind: operationKindAtomicRMW8, B1: byte(unsignedType), B2: byte(op), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationAtomicRMW16 is a constructor for unionOperation with operationKindAtomicRMW16. -// -// This corresponds to -// -// wasm.OpcodeAtomicI32RMW16AddUName wasm.OpcodeAtomicI64Rmw16AddUName -// wasm.OpcodeAtomicI32RMW16SubUName wasm.OpcodeAtomicI64Rmw16SubUName -// wasm.OpcodeAtomicI32RMW16AndUName wasm.OpcodeAtomicI64Rmw16AndUName -// wasm.OpcodeAtomicI32RMW16OrUName wasm.OpcodeAtomicI64Rmw16OrUName -// wasm.OpcodeAtomicI32RMW16XorUName wasm.OpcodeAtomicI64Rmw16XorUName -func newOperationAtomicRMW16(unsignedType unsignedType, arg memoryArg, op atomicArithmeticOp) unionOperation { - return unionOperation{Kind: operationKindAtomicRMW16, B1: byte(unsignedType), B2: byte(op), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationAtomicRMWCmpxchg is a constructor for unionOperation with operationKindAtomicRMWCmpxchg. -// -// This corresponds to -// -// wasm.OpcodeAtomicI32RMWCmpxchgName wasm.OpcodeAtomicI64RmwCmpxchgName -func newOperationAtomicRMWCmpxchg(unsignedType unsignedType, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindAtomicRMWCmpxchg, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationAtomicRMW8Cmpxchg is a constructor for unionOperation with operationKindAtomicRMW8Cmpxchg. -// -// This corresponds to -// -// wasm.OpcodeAtomicI32RMW8CmpxchgUName wasm.OpcodeAtomicI64Rmw8CmpxchgUName -func newOperationAtomicRMW8Cmpxchg(unsignedType unsignedType, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindAtomicRMW8Cmpxchg, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} - -// NewOperationAtomicRMW16Cmpxchg is a constructor for unionOperation with operationKindAtomicRMW16Cmpxchg. -// -// This corresponds to -// -// wasm.OpcodeAtomicI32RMW16CmpxchgUName wasm.OpcodeAtomicI64Rmw16CmpxchgUName -func newOperationAtomicRMW16Cmpxchg(unsignedType unsignedType, arg memoryArg) unionOperation { - return unionOperation{Kind: operationKindAtomicRMW16Cmpxchg, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go deleted file mode 100644 index 7b9d5602d..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go +++ /dev/null @@ -1,767 +0,0 @@ -package interpreter - -import ( - "fmt" - - "github.com/tetratelabs/wazero/internal/wasm" -) - -// signature represents how a Wasm opcode -// manipulates the value stacks in terms of value types. -type signature struct { - in, out []unsignedType -} - -var ( - signature_None_None = &signature{} - signature_Unknown_None = &signature{ - in: []unsignedType{unsignedTypeUnknown}, - } - signature_None_I32 = &signature{ - out: []unsignedType{unsignedTypeI32}, - } - signature_None_I64 = &signature{ - out: []unsignedType{unsignedTypeI64}, - } - signature_None_V128 = &signature{ - out: []unsignedType{unsignedTypeV128}, - } - signature_None_F32 = &signature{ - out: []unsignedType{unsignedTypeF32}, - } - signature_None_F64 = &signature{ - out: []unsignedType{unsignedTypeF64}, - } - signature_I32_None = &signature{ - in: []unsignedType{unsignedTypeI32}, - } - signature_I64_None = &signature{ - in: []unsignedType{unsignedTypeI64}, - } - signature_F32_None = &signature{ - in: []unsignedType{unsignedTypeF32}, - } - signature_F64_None = &signature{ - in: []unsignedType{unsignedTypeF64}, - } - signature_V128_None = &signature{ - in: []unsignedType{unsignedTypeV128}, - } - signature_I32_I32 = &signature{ - in: []unsignedType{unsignedTypeI32}, - out: []unsignedType{unsignedTypeI32}, - } - signature_I32_I64 = &signature{ - in: []unsignedType{unsignedTypeI32}, - out: []unsignedType{unsignedTypeI64}, - } - signature_I64_I64 = &signature{ - in: []unsignedType{unsignedTypeI64}, - out: []unsignedType{unsignedTypeI64}, - } - signature_I32_F32 = &signature{ - in: []unsignedType{unsignedTypeI32}, - out: []unsignedType{unsignedTypeF32}, - } - signature_I32_F64 = &signature{ - in: []unsignedType{unsignedTypeI32}, - out: []unsignedType{unsignedTypeF64}, - } - signature_I64_I32 = &signature{ - in: []unsignedType{unsignedTypeI64}, - out: []unsignedType{unsignedTypeI32}, - } - signature_I64_F32 = &signature{ - in: []unsignedType{unsignedTypeI64}, - out: []unsignedType{unsignedTypeF32}, - } - signature_I64_F64 = &signature{ - in: []unsignedType{unsignedTypeI64}, - out: []unsignedType{unsignedTypeF64}, - } - signature_F32_I32 = &signature{ - in: []unsignedType{unsignedTypeF32}, - out: []unsignedType{unsignedTypeI32}, - } - signature_F32_I64 = &signature{ - in: []unsignedType{unsignedTypeF32}, - out: []unsignedType{unsignedTypeI64}, - } - signature_F32_F64 = &signature{ - in: []unsignedType{unsignedTypeF32}, - out: []unsignedType{unsignedTypeF64}, - } - signature_F32_F32 = &signature{ - in: []unsignedType{unsignedTypeF32}, - out: []unsignedType{unsignedTypeF32}, - } - signature_F64_I32 = &signature{ - in: []unsignedType{unsignedTypeF64}, - out: []unsignedType{unsignedTypeI32}, - } - signature_F64_F32 = &signature{ - in: []unsignedType{unsignedTypeF64}, - out: []unsignedType{unsignedTypeF32}, - } - signature_F64_I64 = &signature{ - in: []unsignedType{unsignedTypeF64}, - out: []unsignedType{unsignedTypeI64}, - } - signature_F64_F64 = &signature{ - in: []unsignedType{unsignedTypeF64}, - out: []unsignedType{unsignedTypeF64}, - } - signature_I32I32_None = &signature{ - in: []unsignedType{unsignedTypeI32, unsignedTypeI32}, - } - - signature_I32I32_I32 = &signature{ - in: []unsignedType{unsignedTypeI32, unsignedTypeI32}, - out: []unsignedType{unsignedTypeI32}, - } - signature_I32I64_None = &signature{ - in: []unsignedType{unsignedTypeI32, unsignedTypeI64}, - } - signature_I32F32_None = &signature{ - in: []unsignedType{unsignedTypeI32, unsignedTypeF32}, - } - signature_I32F64_None = &signature{ - in: []unsignedType{unsignedTypeI32, unsignedTypeF64}, - } - signature_I64I32_I32 = &signature{ - in: []unsignedType{unsignedTypeI64, unsignedTypeI32}, - out: []unsignedType{unsignedTypeI32}, - } - signature_I64I64_I32 = &signature{ - in: []unsignedType{unsignedTypeI64, unsignedTypeI64}, - out: []unsignedType{unsignedTypeI32}, - } - signature_I64I64_I64 = &signature{ - in: []unsignedType{unsignedTypeI64, unsignedTypeI64}, - out: []unsignedType{unsignedTypeI64}, - } - signature_F32F32_I32 = &signature{ - in: []unsignedType{unsignedTypeF32, unsignedTypeF32}, - out: []unsignedType{unsignedTypeI32}, - } - signature_F32F32_F32 = &signature{ - in: []unsignedType{unsignedTypeF32, unsignedTypeF32}, - out: []unsignedType{unsignedTypeF32}, - } - signature_F64F64_I32 = &signature{ - in: []unsignedType{unsignedTypeF64, unsignedTypeF64}, - out: []unsignedType{unsignedTypeI32}, - } - signature_F64F64_F64 = &signature{ - in: []unsignedType{unsignedTypeF64, unsignedTypeF64}, - out: []unsignedType{unsignedTypeF64}, - } - signature_I32I32I32_None = &signature{ - in: []unsignedType{unsignedTypeI32, unsignedTypeI32, unsignedTypeI32}, - } - signature_I32I64I32_None = &signature{ - in: []unsignedType{unsignedTypeI32, unsignedTypeI64, unsignedTypeI32}, - } - signature_UnknownUnknownI32_Unknown = &signature{ - in: []unsignedType{unsignedTypeUnknown, unsignedTypeUnknown, unsignedTypeI32}, - out: []unsignedType{unsignedTypeUnknown}, - } - signature_V128V128_V128 = &signature{ - in: []unsignedType{unsignedTypeV128, unsignedTypeV128}, - out: []unsignedType{unsignedTypeV128}, - } - signature_V128V128V128_V32 = &signature{ - in: []unsignedType{unsignedTypeV128, unsignedTypeV128, unsignedTypeV128}, - out: []unsignedType{unsignedTypeV128}, - } - signature_I32_V128 = &signature{ - in: []unsignedType{unsignedTypeI32}, - out: []unsignedType{unsignedTypeV128}, - } - signature_I32V128_None = &signature{ - in: []unsignedType{unsignedTypeI32, unsignedTypeV128}, - } - signature_I32V128_V128 = &signature{ - in: []unsignedType{unsignedTypeI32, unsignedTypeV128}, - out: []unsignedType{unsignedTypeV128}, - } - signature_V128I32_V128 = &signature{ - in: []unsignedType{unsignedTypeV128, unsignedTypeI32}, - out: []unsignedType{unsignedTypeV128}, - } - signature_V128I64_V128 = &signature{ - in: []unsignedType{unsignedTypeV128, unsignedTypeI64}, - out: []unsignedType{unsignedTypeV128}, - } - signature_V128F32_V128 = &signature{ - in: []unsignedType{unsignedTypeV128, unsignedTypeF32}, - out: []unsignedType{unsignedTypeV128}, - } - signature_V128F64_V128 = &signature{ - in: []unsignedType{unsignedTypeV128, unsignedTypeF64}, - out: []unsignedType{unsignedTypeV128}, - } - signature_V128_I32 = &signature{ - in: []unsignedType{unsignedTypeV128}, - out: []unsignedType{unsignedTypeI32}, - } - signature_V128_I64 = &signature{ - in: []unsignedType{unsignedTypeV128}, - out: []unsignedType{unsignedTypeI64}, - } - signature_V128_F32 = &signature{ - in: []unsignedType{unsignedTypeV128}, - out: []unsignedType{unsignedTypeF32}, - } - signature_V128_F64 = &signature{ - in: []unsignedType{unsignedTypeV128}, - out: []unsignedType{unsignedTypeF64}, - } - signature_V128_V128 = &signature{ - in: []unsignedType{unsignedTypeV128}, - out: []unsignedType{unsignedTypeV128}, - } - signature_I64_V128 = &signature{ - in: []unsignedType{unsignedTypeI64}, - out: []unsignedType{unsignedTypeV128}, - } - signature_F32_V128 = &signature{ - in: []unsignedType{unsignedTypeF32}, - out: []unsignedType{unsignedTypeV128}, - } - signature_F64_V128 = &signature{ - in: []unsignedType{unsignedTypeF64}, - out: []unsignedType{unsignedTypeV128}, - } - signature_I32I64_I64 = &signature{ - in: []unsignedType{unsignedTypeI32, unsignedTypeI64}, - out: []unsignedType{unsignedTypeI64}, - } - signature_I32I32I64_I32 = &signature{ - in: []unsignedType{unsignedTypeI32, unsignedTypeI32, unsignedTypeI64}, - out: []unsignedType{unsignedTypeI32}, - } - signature_I32I64I64_I32 = &signature{ - in: []unsignedType{unsignedTypeI32, unsignedTypeI64, unsignedTypeI64}, - out: []unsignedType{unsignedTypeI32}, - } - signature_I32I32I32_I32 = &signature{ - in: []unsignedType{unsignedTypeI32, unsignedTypeI32, unsignedTypeI32}, - out: []unsignedType{unsignedTypeI32}, - } - signature_I32I64I64_I64 = &signature{ - in: []unsignedType{unsignedTypeI32, unsignedTypeI64, unsignedTypeI64}, - out: []unsignedType{unsignedTypeI64}, - } -) - -// wasmOpcodeSignature returns the signature of given Wasm opcode. -// Note that some of opcodes' signature vary depending on -// the function instance (for example, local types). -// "index" parameter is not used by most of opcodes. -// The returned signature is used for stack validation when lowering Wasm's opcodes to interpreterir. -func (c *compiler) wasmOpcodeSignature(op wasm.Opcode, index uint32) (*signature, error) { - switch op { - case wasm.OpcodeUnreachable, wasm.OpcodeNop, wasm.OpcodeBlock, wasm.OpcodeLoop: - return signature_None_None, nil - case wasm.OpcodeIf: - return signature_I32_None, nil - case wasm.OpcodeElse, wasm.OpcodeEnd, wasm.OpcodeBr: - return signature_None_None, nil - case wasm.OpcodeBrIf, wasm.OpcodeBrTable: - return signature_I32_None, nil - case wasm.OpcodeReturn: - return signature_None_None, nil - case wasm.OpcodeCall: - return c.funcTypeToSigs.get(c.funcs[index], false /* direct */), nil - case wasm.OpcodeCallIndirect: - return c.funcTypeToSigs.get(index, true /* call_indirect */), nil - case wasm.OpcodeDrop: - return signature_Unknown_None, nil - case wasm.OpcodeSelect, wasm.OpcodeTypedSelect: - return signature_UnknownUnknownI32_Unknown, nil - case wasm.OpcodeLocalGet: - inputLen := uint32(len(c.sig.Params)) - if l := uint32(len(c.localTypes)) + inputLen; index >= l { - return nil, fmt.Errorf("invalid local index for local.get %d >= %d", index, l) - } - var t wasm.ValueType - if index < inputLen { - t = c.sig.Params[index] - } else { - t = c.localTypes[index-inputLen] - } - return wasmValueTypeToUnsignedOutSignature(t), nil - case wasm.OpcodeLocalSet: - inputLen := uint32(len(c.sig.Params)) - if l := uint32(len(c.localTypes)) + inputLen; index >= l { - return nil, fmt.Errorf("invalid local index for local.get %d >= %d", index, l) - } - var t wasm.ValueType - if index < inputLen { - t = c.sig.Params[index] - } else { - t = c.localTypes[index-inputLen] - } - return wasmValueTypeToUnsignedInSignature(t), nil - case wasm.OpcodeLocalTee: - inputLen := uint32(len(c.sig.Params)) - if l := uint32(len(c.localTypes)) + inputLen; index >= l { - return nil, fmt.Errorf("invalid local index for local.get %d >= %d", index, l) - } - var t wasm.ValueType - if index < inputLen { - t = c.sig.Params[index] - } else { - t = c.localTypes[index-inputLen] - } - return wasmValueTypeToUnsignedInOutSignature(t), nil - case wasm.OpcodeGlobalGet: - if len(c.globals) <= int(index) { - return nil, fmt.Errorf("invalid global index for global.get %d >= %d", index, len(c.globals)) - } - return wasmValueTypeToUnsignedOutSignature(c.globals[index].ValType), nil - case wasm.OpcodeGlobalSet: - if len(c.globals) <= int(index) { - return nil, fmt.Errorf("invalid global index for global.get %d >= %d", index, len(c.globals)) - } - return wasmValueTypeToUnsignedInSignature(c.globals[index].ValType), nil - case wasm.OpcodeI32Load: - return signature_I32_I32, nil - case wasm.OpcodeI64Load: - return signature_I32_I64, nil - case wasm.OpcodeF32Load: - return signature_I32_F32, nil - case wasm.OpcodeF64Load: - return signature_I32_F64, nil - case wasm.OpcodeI32Load8S, wasm.OpcodeI32Load8U, wasm.OpcodeI32Load16S, wasm.OpcodeI32Load16U: - return signature_I32_I32, nil - case wasm.OpcodeI64Load8S, wasm.OpcodeI64Load8U, wasm.OpcodeI64Load16S, wasm.OpcodeI64Load16U, - wasm.OpcodeI64Load32S, wasm.OpcodeI64Load32U: - return signature_I32_I64, nil - case wasm.OpcodeI32Store: - return signature_I32I32_None, nil - case wasm.OpcodeI64Store: - return signature_I32I64_None, nil - case wasm.OpcodeF32Store: - return signature_I32F32_None, nil - case wasm.OpcodeF64Store: - return signature_I32F64_None, nil - case wasm.OpcodeI32Store8: - return signature_I32I32_None, nil - case wasm.OpcodeI32Store16: - return signature_I32I32_None, nil - case wasm.OpcodeI64Store8: - return signature_I32I64_None, nil - case wasm.OpcodeI64Store16: - return signature_I32I64_None, nil - case wasm.OpcodeI64Store32: - return signature_I32I64_None, nil - case wasm.OpcodeMemorySize: - return signature_None_I32, nil - case wasm.OpcodeMemoryGrow: - return signature_I32_I32, nil - case wasm.OpcodeI32Const: - return signature_None_I32, nil - case wasm.OpcodeI64Const: - return signature_None_I64, nil - case wasm.OpcodeF32Const: - return signature_None_F32, nil - case wasm.OpcodeF64Const: - return signature_None_F64, nil - case wasm.OpcodeI32Eqz: - return signature_I32_I32, nil - case wasm.OpcodeI32Eq, wasm.OpcodeI32Ne, wasm.OpcodeI32LtS, - wasm.OpcodeI32LtU, wasm.OpcodeI32GtS, wasm.OpcodeI32GtU, - wasm.OpcodeI32LeS, wasm.OpcodeI32LeU, wasm.OpcodeI32GeS, - wasm.OpcodeI32GeU: - return signature_I32I32_I32, nil - case wasm.OpcodeI64Eqz: - return signature_I64_I32, nil - case wasm.OpcodeI64Eq, wasm.OpcodeI64Ne, wasm.OpcodeI64LtS, - wasm.OpcodeI64LtU, wasm.OpcodeI64GtS, wasm.OpcodeI64GtU, - wasm.OpcodeI64LeS, wasm.OpcodeI64LeU, wasm.OpcodeI64GeS, - wasm.OpcodeI64GeU: - return signature_I64I64_I32, nil - case wasm.OpcodeF32Eq, wasm.OpcodeF32Ne, wasm.OpcodeF32Lt, - wasm.OpcodeF32Gt, wasm.OpcodeF32Le, wasm.OpcodeF32Ge: - return signature_F32F32_I32, nil - case wasm.OpcodeF64Eq, wasm.OpcodeF64Ne, wasm.OpcodeF64Lt, - wasm.OpcodeF64Gt, wasm.OpcodeF64Le, wasm.OpcodeF64Ge: - return signature_F64F64_I32, nil - case wasm.OpcodeI32Clz, wasm.OpcodeI32Ctz, wasm.OpcodeI32Popcnt: - return signature_I32_I32, nil - case wasm.OpcodeI32Add, wasm.OpcodeI32Sub, wasm.OpcodeI32Mul, - wasm.OpcodeI32DivS, wasm.OpcodeI32DivU, wasm.OpcodeI32RemS, - wasm.OpcodeI32RemU, wasm.OpcodeI32And, wasm.OpcodeI32Or, - wasm.OpcodeI32Xor, wasm.OpcodeI32Shl, wasm.OpcodeI32ShrS, - wasm.OpcodeI32ShrU, wasm.OpcodeI32Rotl, wasm.OpcodeI32Rotr: - return signature_I32I32_I32, nil - case wasm.OpcodeI64Clz, wasm.OpcodeI64Ctz, wasm.OpcodeI64Popcnt: - return signature_I64_I64, nil - case wasm.OpcodeI64Add, wasm.OpcodeI64Sub, wasm.OpcodeI64Mul, - wasm.OpcodeI64DivS, wasm.OpcodeI64DivU, wasm.OpcodeI64RemS, - wasm.OpcodeI64RemU, wasm.OpcodeI64And, wasm.OpcodeI64Or, - wasm.OpcodeI64Xor, wasm.OpcodeI64Shl, wasm.OpcodeI64ShrS, - wasm.OpcodeI64ShrU, wasm.OpcodeI64Rotl, wasm.OpcodeI64Rotr: - return signature_I64I64_I64, nil - case wasm.OpcodeF32Abs, wasm.OpcodeF32Neg, wasm.OpcodeF32Ceil, - wasm.OpcodeF32Floor, wasm.OpcodeF32Trunc, wasm.OpcodeF32Nearest, - wasm.OpcodeF32Sqrt: - return signature_F32_F32, nil - case wasm.OpcodeF32Add, wasm.OpcodeF32Sub, wasm.OpcodeF32Mul, - wasm.OpcodeF32Div, wasm.OpcodeF32Min, wasm.OpcodeF32Max, - wasm.OpcodeF32Copysign: - return signature_F32F32_F32, nil - case wasm.OpcodeF64Abs, wasm.OpcodeF64Neg, wasm.OpcodeF64Ceil, - wasm.OpcodeF64Floor, wasm.OpcodeF64Trunc, wasm.OpcodeF64Nearest, - wasm.OpcodeF64Sqrt: - return signature_F64_F64, nil - case wasm.OpcodeF64Add, wasm.OpcodeF64Sub, wasm.OpcodeF64Mul, - wasm.OpcodeF64Div, wasm.OpcodeF64Min, wasm.OpcodeF64Max, - wasm.OpcodeF64Copysign: - return signature_F64F64_F64, nil - case wasm.OpcodeI32WrapI64: - return signature_I64_I32, nil - case wasm.OpcodeI32TruncF32S, wasm.OpcodeI32TruncF32U: - return signature_F32_I32, nil - case wasm.OpcodeI32TruncF64S, wasm.OpcodeI32TruncF64U: - return signature_F64_I32, nil - case wasm.OpcodeI64ExtendI32S, wasm.OpcodeI64ExtendI32U: - return signature_I32_I64, nil - case wasm.OpcodeI64TruncF32S, wasm.OpcodeI64TruncF32U: - return signature_F32_I64, nil - case wasm.OpcodeI64TruncF64S, wasm.OpcodeI64TruncF64U: - return signature_F64_I64, nil - case wasm.OpcodeF32ConvertI32S, wasm.OpcodeF32ConvertI32U: - return signature_I32_F32, nil - case wasm.OpcodeF32ConvertI64S, wasm.OpcodeF32ConvertI64U: - return signature_I64_F32, nil - case wasm.OpcodeF32DemoteF64: - return signature_F64_F32, nil - case wasm.OpcodeF64ConvertI32S, wasm.OpcodeF64ConvertI32U: - return signature_I32_F64, nil - case wasm.OpcodeF64ConvertI64S, wasm.OpcodeF64ConvertI64U: - return signature_I64_F64, nil - case wasm.OpcodeF64PromoteF32: - return signature_F32_F64, nil - case wasm.OpcodeI32ReinterpretF32: - return signature_F32_I32, nil - case wasm.OpcodeI64ReinterpretF64: - return signature_F64_I64, nil - case wasm.OpcodeF32ReinterpretI32: - return signature_I32_F32, nil - case wasm.OpcodeF64ReinterpretI64: - return signature_I64_F64, nil - case wasm.OpcodeI32Extend8S, wasm.OpcodeI32Extend16S: - return signature_I32_I32, nil - case wasm.OpcodeI64Extend8S, wasm.OpcodeI64Extend16S, wasm.OpcodeI64Extend32S: - return signature_I64_I64, nil - case wasm.OpcodeTableGet: - // table.get takes table's offset and pushes the ref type value of opaque pointer as i64 value onto the stack. - return signature_I32_I64, nil - case wasm.OpcodeTableSet: - // table.set takes table's offset and the ref type value of opaque pointer as i64 value. - return signature_I32I64_None, nil - case wasm.OpcodeRefFunc: - // ref.func is translated as pushing the compiled function's opaque pointer (uint64) at interpreterir layer. - return signature_None_I64, nil - case wasm.OpcodeRefIsNull: - // ref.is_null is translated as checking if the uint64 on the top of the stack (opaque pointer) is zero or not. - return signature_I64_I32, nil - case wasm.OpcodeRefNull: - // ref.null is translated as i64.const 0. - return signature_None_I64, nil - case wasm.OpcodeMiscPrefix: - switch miscOp := c.body[c.pc+1]; miscOp { - case wasm.OpcodeMiscI32TruncSatF32S, wasm.OpcodeMiscI32TruncSatF32U: - return signature_F32_I32, nil - case wasm.OpcodeMiscI32TruncSatF64S, wasm.OpcodeMiscI32TruncSatF64U: - return signature_F64_I32, nil - case wasm.OpcodeMiscI64TruncSatF32S, wasm.OpcodeMiscI64TruncSatF32U: - return signature_F32_I64, nil - case wasm.OpcodeMiscI64TruncSatF64S, wasm.OpcodeMiscI64TruncSatF64U: - return signature_F64_I64, nil - case wasm.OpcodeMiscMemoryInit, wasm.OpcodeMiscMemoryCopy, wasm.OpcodeMiscMemoryFill, - wasm.OpcodeMiscTableInit, wasm.OpcodeMiscTableCopy: - return signature_I32I32I32_None, nil - case wasm.OpcodeMiscDataDrop, wasm.OpcodeMiscElemDrop: - return signature_None_None, nil - case wasm.OpcodeMiscTableGrow: - return signature_I64I32_I32, nil - case wasm.OpcodeMiscTableSize: - return signature_None_I32, nil - case wasm.OpcodeMiscTableFill: - return signature_I32I64I32_None, nil - default: - return nil, fmt.Errorf("unsupported misc instruction in interpreterir: 0x%x", op) - } - case wasm.OpcodeVecPrefix: - switch vecOp := c.body[c.pc+1]; vecOp { - case wasm.OpcodeVecV128Const: - return signature_None_V128, nil - case wasm.OpcodeVecV128Load, wasm.OpcodeVecV128Load8x8s, wasm.OpcodeVecV128Load8x8u, - wasm.OpcodeVecV128Load16x4s, wasm.OpcodeVecV128Load16x4u, wasm.OpcodeVecV128Load32x2s, - wasm.OpcodeVecV128Load32x2u, wasm.OpcodeVecV128Load8Splat, wasm.OpcodeVecV128Load16Splat, - wasm.OpcodeVecV128Load32Splat, wasm.OpcodeVecV128Load64Splat, wasm.OpcodeVecV128Load32zero, - wasm.OpcodeVecV128Load64zero: - return signature_I32_V128, nil - case wasm.OpcodeVecV128Load8Lane, wasm.OpcodeVecV128Load16Lane, - wasm.OpcodeVecV128Load32Lane, wasm.OpcodeVecV128Load64Lane: - return signature_I32V128_V128, nil - case wasm.OpcodeVecV128Store, - wasm.OpcodeVecV128Store8Lane, - wasm.OpcodeVecV128Store16Lane, - wasm.OpcodeVecV128Store32Lane, - wasm.OpcodeVecV128Store64Lane: - return signature_I32V128_None, nil - case wasm.OpcodeVecI8x16ExtractLaneS, - wasm.OpcodeVecI8x16ExtractLaneU, - wasm.OpcodeVecI16x8ExtractLaneS, - wasm.OpcodeVecI16x8ExtractLaneU, - wasm.OpcodeVecI32x4ExtractLane: - return signature_V128_I32, nil - case wasm.OpcodeVecI64x2ExtractLane: - return signature_V128_I64, nil - case wasm.OpcodeVecF32x4ExtractLane: - return signature_V128_F32, nil - case wasm.OpcodeVecF64x2ExtractLane: - return signature_V128_F64, nil - case wasm.OpcodeVecI8x16ReplaceLane, wasm.OpcodeVecI16x8ReplaceLane, wasm.OpcodeVecI32x4ReplaceLane, - wasm.OpcodeVecI8x16Shl, wasm.OpcodeVecI8x16ShrS, wasm.OpcodeVecI8x16ShrU, - wasm.OpcodeVecI16x8Shl, wasm.OpcodeVecI16x8ShrS, wasm.OpcodeVecI16x8ShrU, - wasm.OpcodeVecI32x4Shl, wasm.OpcodeVecI32x4ShrS, wasm.OpcodeVecI32x4ShrU, - wasm.OpcodeVecI64x2Shl, wasm.OpcodeVecI64x2ShrS, wasm.OpcodeVecI64x2ShrU: - return signature_V128I32_V128, nil - case wasm.OpcodeVecI64x2ReplaceLane: - return signature_V128I64_V128, nil - case wasm.OpcodeVecF32x4ReplaceLane: - return signature_V128F32_V128, nil - case wasm.OpcodeVecF64x2ReplaceLane: - return signature_V128F64_V128, nil - case wasm.OpcodeVecI8x16Splat, - wasm.OpcodeVecI16x8Splat, - wasm.OpcodeVecI32x4Splat: - return signature_I32_V128, nil - case wasm.OpcodeVecI64x2Splat: - return signature_I64_V128, nil - case wasm.OpcodeVecF32x4Splat: - return signature_F32_V128, nil - case wasm.OpcodeVecF64x2Splat: - return signature_F64_V128, nil - case wasm.OpcodeVecV128i8x16Shuffle, wasm.OpcodeVecI8x16Swizzle, wasm.OpcodeVecV128And, wasm.OpcodeVecV128Or, wasm.OpcodeVecV128Xor, wasm.OpcodeVecV128AndNot: - return signature_V128V128_V128, nil - case wasm.OpcodeVecI8x16AllTrue, wasm.OpcodeVecI16x8AllTrue, wasm.OpcodeVecI32x4AllTrue, wasm.OpcodeVecI64x2AllTrue, - wasm.OpcodeVecV128AnyTrue, - wasm.OpcodeVecI8x16BitMask, wasm.OpcodeVecI16x8BitMask, wasm.OpcodeVecI32x4BitMask, wasm.OpcodeVecI64x2BitMask: - return signature_V128_I32, nil - case wasm.OpcodeVecV128Not, wasm.OpcodeVecI8x16Neg, wasm.OpcodeVecI16x8Neg, wasm.OpcodeVecI32x4Neg, wasm.OpcodeVecI64x2Neg, - wasm.OpcodeVecF32x4Neg, wasm.OpcodeVecF64x2Neg, wasm.OpcodeVecF32x4Sqrt, wasm.OpcodeVecF64x2Sqrt, - wasm.OpcodeVecI8x16Abs, wasm.OpcodeVecI8x16Popcnt, wasm.OpcodeVecI16x8Abs, wasm.OpcodeVecI32x4Abs, wasm.OpcodeVecI64x2Abs, - wasm.OpcodeVecF32x4Abs, wasm.OpcodeVecF64x2Abs, - wasm.OpcodeVecF32x4Ceil, wasm.OpcodeVecF32x4Floor, wasm.OpcodeVecF32x4Trunc, wasm.OpcodeVecF32x4Nearest, - wasm.OpcodeVecF64x2Ceil, wasm.OpcodeVecF64x2Floor, wasm.OpcodeVecF64x2Trunc, wasm.OpcodeVecF64x2Nearest, - wasm.OpcodeVecI16x8ExtendLowI8x16S, wasm.OpcodeVecI16x8ExtendHighI8x16S, wasm.OpcodeVecI16x8ExtendLowI8x16U, wasm.OpcodeVecI16x8ExtendHighI8x16U, - wasm.OpcodeVecI32x4ExtendLowI16x8S, wasm.OpcodeVecI32x4ExtendHighI16x8S, wasm.OpcodeVecI32x4ExtendLowI16x8U, wasm.OpcodeVecI32x4ExtendHighI16x8U, - wasm.OpcodeVecI64x2ExtendLowI32x4S, wasm.OpcodeVecI64x2ExtendHighI32x4S, wasm.OpcodeVecI64x2ExtendLowI32x4U, wasm.OpcodeVecI64x2ExtendHighI32x4U, - wasm.OpcodeVecI16x8ExtaddPairwiseI8x16S, wasm.OpcodeVecI16x8ExtaddPairwiseI8x16U, wasm.OpcodeVecI32x4ExtaddPairwiseI16x8S, wasm.OpcodeVecI32x4ExtaddPairwiseI16x8U, - wasm.OpcodeVecF64x2PromoteLowF32x4Zero, wasm.OpcodeVecF32x4DemoteF64x2Zero, - wasm.OpcodeVecF32x4ConvertI32x4S, wasm.OpcodeVecF32x4ConvertI32x4U, - wasm.OpcodeVecF64x2ConvertLowI32x4S, wasm.OpcodeVecF64x2ConvertLowI32x4U, - wasm.OpcodeVecI32x4TruncSatF32x4S, wasm.OpcodeVecI32x4TruncSatF32x4U, - wasm.OpcodeVecI32x4TruncSatF64x2SZero, wasm.OpcodeVecI32x4TruncSatF64x2UZero: - return signature_V128_V128, nil - case wasm.OpcodeVecV128Bitselect: - return signature_V128V128V128_V32, nil - case wasm.OpcodeVecI8x16Eq, wasm.OpcodeVecI8x16Ne, wasm.OpcodeVecI8x16LtS, wasm.OpcodeVecI8x16LtU, wasm.OpcodeVecI8x16GtS, - wasm.OpcodeVecI8x16GtU, wasm.OpcodeVecI8x16LeS, wasm.OpcodeVecI8x16LeU, wasm.OpcodeVecI8x16GeS, wasm.OpcodeVecI8x16GeU, - wasm.OpcodeVecI16x8Eq, wasm.OpcodeVecI16x8Ne, wasm.OpcodeVecI16x8LtS, wasm.OpcodeVecI16x8LtU, wasm.OpcodeVecI16x8GtS, - wasm.OpcodeVecI16x8GtU, wasm.OpcodeVecI16x8LeS, wasm.OpcodeVecI16x8LeU, wasm.OpcodeVecI16x8GeS, wasm.OpcodeVecI16x8GeU, - wasm.OpcodeVecI32x4Eq, wasm.OpcodeVecI32x4Ne, wasm.OpcodeVecI32x4LtS, wasm.OpcodeVecI32x4LtU, wasm.OpcodeVecI32x4GtS, - wasm.OpcodeVecI32x4GtU, wasm.OpcodeVecI32x4LeS, wasm.OpcodeVecI32x4LeU, wasm.OpcodeVecI32x4GeS, wasm.OpcodeVecI32x4GeU, - wasm.OpcodeVecI64x2Eq, wasm.OpcodeVecI64x2Ne, wasm.OpcodeVecI64x2LtS, wasm.OpcodeVecI64x2GtS, wasm.OpcodeVecI64x2LeS, - wasm.OpcodeVecI64x2GeS, wasm.OpcodeVecF32x4Eq, wasm.OpcodeVecF32x4Ne, wasm.OpcodeVecF32x4Lt, wasm.OpcodeVecF32x4Gt, - wasm.OpcodeVecF32x4Le, wasm.OpcodeVecF32x4Ge, wasm.OpcodeVecF64x2Eq, wasm.OpcodeVecF64x2Ne, wasm.OpcodeVecF64x2Lt, - wasm.OpcodeVecF64x2Gt, wasm.OpcodeVecF64x2Le, wasm.OpcodeVecF64x2Ge, - wasm.OpcodeVecI8x16Add, wasm.OpcodeVecI8x16AddSatS, wasm.OpcodeVecI8x16AddSatU, wasm.OpcodeVecI8x16Sub, - wasm.OpcodeVecI8x16SubSatS, wasm.OpcodeVecI8x16SubSatU, - wasm.OpcodeVecI16x8Add, wasm.OpcodeVecI16x8AddSatS, wasm.OpcodeVecI16x8AddSatU, wasm.OpcodeVecI16x8Sub, - wasm.OpcodeVecI16x8SubSatS, wasm.OpcodeVecI16x8SubSatU, wasm.OpcodeVecI16x8Mul, - wasm.OpcodeVecI32x4Add, wasm.OpcodeVecI32x4Sub, wasm.OpcodeVecI32x4Mul, - wasm.OpcodeVecI64x2Add, wasm.OpcodeVecI64x2Sub, wasm.OpcodeVecI64x2Mul, - wasm.OpcodeVecF32x4Add, wasm.OpcodeVecF32x4Sub, wasm.OpcodeVecF32x4Mul, wasm.OpcodeVecF32x4Div, - wasm.OpcodeVecF64x2Add, wasm.OpcodeVecF64x2Sub, wasm.OpcodeVecF64x2Mul, wasm.OpcodeVecF64x2Div, - wasm.OpcodeVecI8x16MinS, wasm.OpcodeVecI8x16MinU, wasm.OpcodeVecI8x16MaxS, wasm.OpcodeVecI8x16MaxU, wasm.OpcodeVecI8x16AvgrU, - wasm.OpcodeVecI16x8MinS, wasm.OpcodeVecI16x8MinU, wasm.OpcodeVecI16x8MaxS, wasm.OpcodeVecI16x8MaxU, wasm.OpcodeVecI16x8AvgrU, - wasm.OpcodeVecI32x4MinS, wasm.OpcodeVecI32x4MinU, wasm.OpcodeVecI32x4MaxS, wasm.OpcodeVecI32x4MaxU, - wasm.OpcodeVecF32x4Min, wasm.OpcodeVecF32x4Max, wasm.OpcodeVecF64x2Min, wasm.OpcodeVecF64x2Max, - wasm.OpcodeVecF32x4Pmin, wasm.OpcodeVecF32x4Pmax, wasm.OpcodeVecF64x2Pmin, wasm.OpcodeVecF64x2Pmax, - wasm.OpcodeVecI16x8Q15mulrSatS, - wasm.OpcodeVecI16x8ExtMulLowI8x16S, wasm.OpcodeVecI16x8ExtMulHighI8x16S, wasm.OpcodeVecI16x8ExtMulLowI8x16U, wasm.OpcodeVecI16x8ExtMulHighI8x16U, - wasm.OpcodeVecI32x4ExtMulLowI16x8S, wasm.OpcodeVecI32x4ExtMulHighI16x8S, wasm.OpcodeVecI32x4ExtMulLowI16x8U, wasm.OpcodeVecI32x4ExtMulHighI16x8U, - wasm.OpcodeVecI64x2ExtMulLowI32x4S, wasm.OpcodeVecI64x2ExtMulHighI32x4S, wasm.OpcodeVecI64x2ExtMulLowI32x4U, wasm.OpcodeVecI64x2ExtMulHighI32x4U, - wasm.OpcodeVecI32x4DotI16x8S, - wasm.OpcodeVecI8x16NarrowI16x8S, wasm.OpcodeVecI8x16NarrowI16x8U, wasm.OpcodeVecI16x8NarrowI32x4S, wasm.OpcodeVecI16x8NarrowI32x4U: - return signature_V128V128_V128, nil - default: - return nil, fmt.Errorf("unsupported vector instruction in interpreterir: %s", wasm.VectorInstructionName(vecOp)) - } - case wasm.OpcodeAtomicPrefix: - switch atomicOp := c.body[c.pc+1]; atomicOp { - case wasm.OpcodeAtomicMemoryNotify: - return signature_I32I32_I32, nil - case wasm.OpcodeAtomicMemoryWait32: - return signature_I32I32I64_I32, nil - case wasm.OpcodeAtomicMemoryWait64: - return signature_I32I64I64_I32, nil - case wasm.OpcodeAtomicFence: - return signature_None_None, nil - case wasm.OpcodeAtomicI32Load, wasm.OpcodeAtomicI32Load8U, wasm.OpcodeAtomicI32Load16U: - return signature_I32_I32, nil - case wasm.OpcodeAtomicI64Load, wasm.OpcodeAtomicI64Load8U, wasm.OpcodeAtomicI64Load16U, wasm.OpcodeAtomicI64Load32U: - return signature_I32_I64, nil - case wasm.OpcodeAtomicI32Store, wasm.OpcodeAtomicI32Store8, wasm.OpcodeAtomicI32Store16: - return signature_I32I32_None, nil - case wasm.OpcodeAtomicI64Store, wasm.OpcodeAtomicI64Store8, wasm.OpcodeAtomicI64Store16, wasm.OpcodeAtomicI64Store32: - return signature_I32I64_None, nil - case wasm.OpcodeAtomicI32RmwAdd, wasm.OpcodeAtomicI32RmwSub, wasm.OpcodeAtomicI32RmwAnd, wasm.OpcodeAtomicI32RmwOr, wasm.OpcodeAtomicI32RmwXor, wasm.OpcodeAtomicI32RmwXchg, - wasm.OpcodeAtomicI32Rmw8AddU, wasm.OpcodeAtomicI32Rmw8SubU, wasm.OpcodeAtomicI32Rmw8AndU, wasm.OpcodeAtomicI32Rmw8OrU, wasm.OpcodeAtomicI32Rmw8XorU, wasm.OpcodeAtomicI32Rmw8XchgU, - wasm.OpcodeAtomicI32Rmw16AddU, wasm.OpcodeAtomicI32Rmw16SubU, wasm.OpcodeAtomicI32Rmw16AndU, wasm.OpcodeAtomicI32Rmw16OrU, wasm.OpcodeAtomicI32Rmw16XorU, wasm.OpcodeAtomicI32Rmw16XchgU: - return signature_I32I32_I32, nil - case wasm.OpcodeAtomicI64RmwAdd, wasm.OpcodeAtomicI64RmwSub, wasm.OpcodeAtomicI64RmwAnd, wasm.OpcodeAtomicI64RmwOr, wasm.OpcodeAtomicI64RmwXor, wasm.OpcodeAtomicI64RmwXchg, - wasm.OpcodeAtomicI64Rmw8AddU, wasm.OpcodeAtomicI64Rmw8SubU, wasm.OpcodeAtomicI64Rmw8AndU, wasm.OpcodeAtomicI64Rmw8OrU, wasm.OpcodeAtomicI64Rmw8XorU, wasm.OpcodeAtomicI64Rmw8XchgU, - wasm.OpcodeAtomicI64Rmw16AddU, wasm.OpcodeAtomicI64Rmw16SubU, wasm.OpcodeAtomicI64Rmw16AndU, wasm.OpcodeAtomicI64Rmw16OrU, wasm.OpcodeAtomicI64Rmw16XorU, wasm.OpcodeAtomicI64Rmw16XchgU, - wasm.OpcodeAtomicI64Rmw32AddU, wasm.OpcodeAtomicI64Rmw32SubU, wasm.OpcodeAtomicI64Rmw32AndU, wasm.OpcodeAtomicI64Rmw32OrU, wasm.OpcodeAtomicI64Rmw32XorU, wasm.OpcodeAtomicI64Rmw32XchgU: - return signature_I32I64_I64, nil - case wasm.OpcodeAtomicI32RmwCmpxchg, wasm.OpcodeAtomicI32Rmw8CmpxchgU, wasm.OpcodeAtomicI32Rmw16CmpxchgU: - return signature_I32I32I32_I32, nil - case wasm.OpcodeAtomicI64RmwCmpxchg, wasm.OpcodeAtomicI64Rmw8CmpxchgU, wasm.OpcodeAtomicI64Rmw16CmpxchgU, wasm.OpcodeAtomicI64Rmw32CmpxchgU: - return signature_I32I64I64_I64, nil - default: - return nil, fmt.Errorf("unsupported atomic instruction in interpreterir: %s", wasm.AtomicInstructionName(atomicOp)) - } - default: - return nil, fmt.Errorf("unsupported instruction in interpreterir: 0x%x", op) - } -} - -// funcTypeToIRSignatures is the central cache for a module to get the *signature -// for function calls. -type funcTypeToIRSignatures struct { - directCalls []*signature - indirectCalls []*signature - wasmTypes []wasm.FunctionType -} - -// get returns the *signature for the direct or indirect function call against functions whose type is at `typeIndex`. -func (f *funcTypeToIRSignatures) get(typeIndex wasm.Index, indirect bool) *signature { - var sig *signature - if indirect { - sig = f.indirectCalls[typeIndex] - } else { - sig = f.directCalls[typeIndex] - } - if sig != nil { - return sig - } - - tp := &f.wasmTypes[typeIndex] - if indirect { - sig = &signature{ - in: make([]unsignedType, 0, len(tp.Params)+1), // +1 to reserve space for call indirect index. - out: make([]unsignedType, 0, len(tp.Results)), - } - } else { - sig = &signature{ - in: make([]unsignedType, 0, len(tp.Params)), - out: make([]unsignedType, 0, len(tp.Results)), - } - } - - for _, vt := range tp.Params { - sig.in = append(sig.in, wasmValueTypeTounsignedType(vt)) - } - for _, vt := range tp.Results { - sig.out = append(sig.out, wasmValueTypeTounsignedType(vt)) - } - - if indirect { - sig.in = append(sig.in, unsignedTypeI32) - f.indirectCalls[typeIndex] = sig - } else { - f.directCalls[typeIndex] = sig - } - return sig -} - -func wasmValueTypeTounsignedType(vt wasm.ValueType) unsignedType { - switch vt { - case wasm.ValueTypeI32: - return unsignedTypeI32 - case wasm.ValueTypeI64, - // From interpreterir layer, ref type values are opaque 64-bit pointers. - wasm.ValueTypeExternref, wasm.ValueTypeFuncref: - return unsignedTypeI64 - case wasm.ValueTypeF32: - return unsignedTypeF32 - case wasm.ValueTypeF64: - return unsignedTypeF64 - case wasm.ValueTypeV128: - return unsignedTypeV128 - } - panic("unreachable") -} - -func wasmValueTypeToUnsignedOutSignature(vt wasm.ValueType) *signature { - switch vt { - case wasm.ValueTypeI32: - return signature_None_I32 - case wasm.ValueTypeI64, - // From interpreterir layer, ref type values are opaque 64-bit pointers. - wasm.ValueTypeExternref, wasm.ValueTypeFuncref: - return signature_None_I64 - case wasm.ValueTypeF32: - return signature_None_F32 - case wasm.ValueTypeF64: - return signature_None_F64 - case wasm.ValueTypeV128: - return signature_None_V128 - } - panic("unreachable") -} - -func wasmValueTypeToUnsignedInSignature(vt wasm.ValueType) *signature { - switch vt { - case wasm.ValueTypeI32: - return signature_I32_None - case wasm.ValueTypeI64, - // From interpreterir layer, ref type values are opaque 64-bit pointers. - wasm.ValueTypeExternref, wasm.ValueTypeFuncref: - return signature_I64_None - case wasm.ValueTypeF32: - return signature_F32_None - case wasm.ValueTypeF64: - return signature_F64_None - case wasm.ValueTypeV128: - return signature_V128_None - } - panic("unreachable") -} - -func wasmValueTypeToUnsignedInOutSignature(vt wasm.ValueType) *signature { - switch vt { - case wasm.ValueTypeI32: - return signature_I32_I32 - case wasm.ValueTypeI64, - // At interpreterir layer, ref type values are opaque 64-bit pointers. - wasm.ValueTypeExternref, wasm.ValueTypeFuncref: - return signature_I64_I64 - case wasm.ValueTypeF32: - return signature_F32_F32 - case wasm.ValueTypeF64: - return signature_F64_F64 - case wasm.ValueTypeV128: - return signature_V128_V128 - } - panic("unreachable") -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/abi.go deleted file mode 100644 index cf91c6b7a..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/abi.go +++ /dev/null @@ -1,170 +0,0 @@ -package backend - -import ( - "fmt" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -type ( - // FunctionABI represents the ABI information for a function which corresponds to a ssa.Signature. - FunctionABI struct { - Initialized bool - - Args, Rets []ABIArg - ArgStackSize, RetStackSize int64 - - ArgIntRealRegs byte - ArgFloatRealRegs byte - RetIntRealRegs byte - RetFloatRealRegs byte - } - - // ABIArg represents either argument or return value's location. - ABIArg struct { - // Index is the index of the argument. - Index int - // Kind is the kind of the argument. - Kind ABIArgKind - // Reg is valid if Kind == ABIArgKindReg. - // This VReg must be based on RealReg. - Reg regalloc.VReg - // Offset is valid if Kind == ABIArgKindStack. - // This is the offset from the beginning of either arg or ret stack slot. - Offset int64 - // Type is the type of the argument. - Type ssa.Type - } - - // ABIArgKind is the kind of ABI argument. - ABIArgKind byte -) - -const ( - // ABIArgKindReg represents an argument passed in a register. - ABIArgKindReg = iota - // ABIArgKindStack represents an argument passed in the stack. - ABIArgKindStack -) - -// String implements fmt.Stringer. -func (a *ABIArg) String() string { - return fmt.Sprintf("args[%d]: %s", a.Index, a.Kind) -} - -// String implements fmt.Stringer. -func (a ABIArgKind) String() string { - switch a { - case ABIArgKindReg: - return "reg" - case ABIArgKindStack: - return "stack" - default: - panic("BUG") - } -} - -// Init initializes the abiImpl for the given signature. -func (a *FunctionABI) Init(sig *ssa.Signature, argResultInts, argResultFloats []regalloc.RealReg) { - if len(a.Rets) < len(sig.Results) { - a.Rets = make([]ABIArg, len(sig.Results)) - } - a.Rets = a.Rets[:len(sig.Results)] - a.RetStackSize = a.setABIArgs(a.Rets, sig.Results, argResultInts, argResultFloats) - if argsNum := len(sig.Params); len(a.Args) < argsNum { - a.Args = make([]ABIArg, argsNum) - } - a.Args = a.Args[:len(sig.Params)] - a.ArgStackSize = a.setABIArgs(a.Args, sig.Params, argResultInts, argResultFloats) - - // Gather the real registers usages in arg/return. - a.ArgIntRealRegs, a.ArgFloatRealRegs = 0, 0 - a.RetIntRealRegs, a.RetFloatRealRegs = 0, 0 - for i := range a.Rets { - r := &a.Rets[i] - if r.Kind == ABIArgKindReg { - if r.Type.IsInt() { - a.RetIntRealRegs++ - } else { - a.RetFloatRealRegs++ - } - } - } - for i := range a.Args { - arg := &a.Args[i] - if arg.Kind == ABIArgKindReg { - if arg.Type.IsInt() { - a.ArgIntRealRegs++ - } else { - a.ArgFloatRealRegs++ - } - } - } - - a.Initialized = true -} - -// setABIArgs sets the ABI arguments in the given slice. This assumes that len(s) >= len(types) -// where if len(s) > len(types), the last elements of s is for the multi-return slot. -func (a *FunctionABI) setABIArgs(s []ABIArg, types []ssa.Type, ints, floats []regalloc.RealReg) (stackSize int64) { - il, fl := len(ints), len(floats) - - var stackOffset int64 - intParamIndex, floatParamIndex := 0, 0 - for i, typ := range types { - arg := &s[i] - arg.Index = i - arg.Type = typ - if typ.IsInt() { - if intParamIndex >= il { - arg.Kind = ABIArgKindStack - const slotSize = 8 // Align 8 bytes. - arg.Offset = stackOffset - stackOffset += slotSize - } else { - arg.Kind = ABIArgKindReg - arg.Reg = regalloc.FromRealReg(ints[intParamIndex], regalloc.RegTypeInt) - intParamIndex++ - } - } else { - if floatParamIndex >= fl { - arg.Kind = ABIArgKindStack - slotSize := int64(8) // Align at least 8 bytes. - if typ.Bits() == 128 { // Vector. - slotSize = 16 - } - arg.Offset = stackOffset - stackOffset += slotSize - } else { - arg.Kind = ABIArgKindReg - arg.Reg = regalloc.FromRealReg(floats[floatParamIndex], regalloc.RegTypeFloat) - floatParamIndex++ - } - } - } - return stackOffset -} - -func (a *FunctionABI) AlignedArgResultStackSlotSize() uint32 { - stackSlotSize := a.RetStackSize + a.ArgStackSize - // Align stackSlotSize to 16 bytes. - stackSlotSize = (stackSlotSize + 15) &^ 15 - // Check overflow 32-bit. - if stackSlotSize > 0xFFFFFFFF { - panic("ABI stack slot size overflow") - } - return uint32(stackSlotSize) -} - -func (a *FunctionABI) ABIInfoAsUint64() uint64 { - return uint64(a.ArgIntRealRegs)<<56 | - uint64(a.ArgFloatRealRegs)<<48 | - uint64(a.RetIntRealRegs)<<40 | - uint64(a.RetFloatRealRegs)<<32 | - uint64(a.AlignedArgResultStackSlotSize()) -} - -func ABIInfoFromUint64(info uint64) (argIntRealRegs, argFloatRealRegs, retIntRealRegs, retFloatRealRegs byte, stackSlotSize uint32) { - return byte(info >> 56), byte(info >> 48), byte(info >> 40), byte(info >> 32), uint32(info) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/backend.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/backend.go deleted file mode 100644 index dd67da43e..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/backend.go +++ /dev/null @@ -1,3 +0,0 @@ -// Package backend must be free of Wasm-specific concept. In other words, -// this package must not import internal/wasm package. -package backend diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go deleted file mode 100644 index 62d365015..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go +++ /dev/null @@ -1,399 +0,0 @@ -package backend - -import ( - "context" - "fmt" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -// NewCompiler returns a new Compiler that can generate a machine code. -func NewCompiler(ctx context.Context, mach Machine, builder ssa.Builder) Compiler { - return newCompiler(ctx, mach, builder) -} - -func newCompiler(_ context.Context, mach Machine, builder ssa.Builder) *compiler { - argResultInts, argResultFloats := mach.ArgsResultsRegs() - c := &compiler{ - mach: mach, ssaBuilder: builder, - nextVRegID: regalloc.VRegIDNonReservedBegin, - argResultInts: argResultInts, - argResultFloats: argResultFloats, - } - mach.SetCompiler(c) - return c -} - -// Compiler is the backend of wazevo which takes ssa.Builder and Machine, -// use the information there to emit the final machine code. -type Compiler interface { - // SSABuilder returns the ssa.Builder used by this compiler. - SSABuilder() ssa.Builder - - // Compile executes the following steps: - // 1. Lower() - // 2. RegAlloc() - // 3. Finalize() - // 4. Encode() - // - // Each step can be called individually for testing purpose, therefore they are exposed in this interface too. - // - // The returned byte slices are the machine code and the relocation information for the machine code. - // The caller is responsible for copying them immediately since the compiler may reuse the buffer. - Compile(ctx context.Context) (_ []byte, _ []RelocationInfo, _ error) - - // Lower lowers the given ssa.Instruction to the machine-specific instructions. - Lower() - - // RegAlloc performs the register allocation after Lower is called. - RegAlloc() - - // Finalize performs the finalization of the compilation, including machine code emission. - // This must be called after RegAlloc. - Finalize(ctx context.Context) error - - // Buf returns the buffer of the encoded machine code. This is only used for testing purpose. - Buf() []byte - - BufPtr() *[]byte - - // Format returns the debug string of the current state of the compiler. - Format() string - - // Init initializes the internal state of the compiler for the next compilation. - Init() - - // AllocateVReg allocates a new virtual register of the given type. - AllocateVReg(typ ssa.Type) regalloc.VReg - - // ValueDefinition returns the definition of the given value. - ValueDefinition(ssa.Value) SSAValueDefinition - - // VRegOf returns the virtual register of the given ssa.Value. - VRegOf(value ssa.Value) regalloc.VReg - - // TypeOf returns the ssa.Type of the given virtual register. - TypeOf(regalloc.VReg) ssa.Type - - // MatchInstr returns true if the given definition is from an instruction with the given opcode, the current group ID, - // and a refcount of 1. That means, the instruction can be merged/swapped within the current instruction group. - MatchInstr(def SSAValueDefinition, opcode ssa.Opcode) bool - - // MatchInstrOneOf is the same as MatchInstr but for multiple opcodes. If it matches one of ssa.Opcode, - // this returns the opcode. Otherwise, this returns ssa.OpcodeInvalid. - // - // Note: caller should be careful to avoid excessive allocation on opcodes slice. - MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode - - // AddRelocationInfo appends the relocation information for the function reference at the current buffer offset. - AddRelocationInfo(funcRef ssa.FuncRef) - - // AddSourceOffsetInfo appends the source offset information for the given offset. - AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset) - - // SourceOffsetInfo returns the source offset information for the current buffer offset. - SourceOffsetInfo() []SourceOffsetInfo - - // EmitByte appends a byte to the buffer. Used during the code emission. - EmitByte(b byte) - - // Emit4Bytes appends 4 bytes to the buffer. Used during the code emission. - Emit4Bytes(b uint32) - - // Emit8Bytes appends 8 bytes to the buffer. Used during the code emission. - Emit8Bytes(b uint64) - - // GetFunctionABI returns the ABI information for the given signature. - GetFunctionABI(sig *ssa.Signature) *FunctionABI -} - -// RelocationInfo represents the relocation information for a call instruction. -type RelocationInfo struct { - // Offset represents the offset from the beginning of the machine code of either a function or the entire module. - Offset int64 - // Target is the target function of the call instruction. - FuncRef ssa.FuncRef -} - -// compiler implements Compiler. -type compiler struct { - mach Machine - currentGID ssa.InstructionGroupID - ssaBuilder ssa.Builder - // nextVRegID is the next virtual register ID to be allocated. - nextVRegID regalloc.VRegID - // ssaValueToVRegs maps ssa.ValueID to regalloc.VReg. - ssaValueToVRegs [] /* VRegID to */ regalloc.VReg - ssaValuesInfo []ssa.ValueInfo - // returnVRegs is the list of virtual registers that store the return values. - returnVRegs []regalloc.VReg - varEdges [][2]regalloc.VReg - varEdgeTypes []ssa.Type - constEdges []struct { - cInst *ssa.Instruction - dst regalloc.VReg - } - vRegSet []bool - vRegIDs []regalloc.VRegID - tempRegs []regalloc.VReg - tmpVals []ssa.Value - ssaTypeOfVRegID [] /* VRegID to */ ssa.Type - buf []byte - relocations []RelocationInfo - sourceOffsets []SourceOffsetInfo - // abis maps ssa.SignatureID to the ABI implementation. - abis []FunctionABI - argResultInts, argResultFloats []regalloc.RealReg -} - -// SourceOffsetInfo is a data to associate the source offset with the executable offset. -type SourceOffsetInfo struct { - // SourceOffset is the source offset in the original source code. - SourceOffset ssa.SourceOffset - // ExecutableOffset is the offset in the compiled executable. - ExecutableOffset int64 -} - -// Compile implements Compiler.Compile. -func (c *compiler) Compile(ctx context.Context) ([]byte, []RelocationInfo, error) { - c.Lower() - if wazevoapi.PrintSSAToBackendIRLowering && wazevoapi.PrintEnabledIndex(ctx) { - fmt.Printf("[[[after lowering for %s ]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format()) - } - if wazevoapi.DeterministicCompilationVerifierEnabled { - wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After lowering to ISA specific IR", c.Format()) - } - c.RegAlloc() - if wazevoapi.PrintRegisterAllocated && wazevoapi.PrintEnabledIndex(ctx) { - fmt.Printf("[[[after regalloc for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format()) - } - if wazevoapi.DeterministicCompilationVerifierEnabled { - wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Register Allocation", c.Format()) - } - if err := c.Finalize(ctx); err != nil { - return nil, nil, err - } - if wazevoapi.PrintFinalizedMachineCode && wazevoapi.PrintEnabledIndex(ctx) { - fmt.Printf("[[[after finalize for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format()) - } - if wazevoapi.DeterministicCompilationVerifierEnabled { - wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Finalization", c.Format()) - } - return c.buf, c.relocations, nil -} - -// RegAlloc implements Compiler.RegAlloc. -func (c *compiler) RegAlloc() { - c.mach.RegAlloc() -} - -// Finalize implements Compiler.Finalize. -func (c *compiler) Finalize(ctx context.Context) error { - c.mach.PostRegAlloc() - return c.mach.Encode(ctx) -} - -// setCurrentGroupID sets the current instruction group ID. -func (c *compiler) setCurrentGroupID(gid ssa.InstructionGroupID) { - c.currentGID = gid -} - -// assignVirtualRegisters assigns a virtual register to each ssa.ValueID Valid in the ssa.Builder. -func (c *compiler) assignVirtualRegisters() { - builder := c.ssaBuilder - c.ssaValuesInfo = builder.ValuesInfo() - - if diff := len(c.ssaValuesInfo) - len(c.ssaValueToVRegs); diff > 0 { - c.ssaValueToVRegs = append(c.ssaValueToVRegs, make([]regalloc.VReg, diff+1)...) - } - - for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() { - // First we assign a virtual register to each parameter. - for i := 0; i < blk.Params(); i++ { - p := blk.Param(i) - pid := p.ID() - typ := p.Type() - vreg := c.AllocateVReg(typ) - c.ssaValueToVRegs[pid] = vreg - c.ssaTypeOfVRegID[vreg.ID()] = p.Type() - } - - // Assigns each value to a virtual register produced by instructions. - for cur := blk.Root(); cur != nil; cur = cur.Next() { - r, rs := cur.Returns() - if r.Valid() { - id := r.ID() - ssaTyp := r.Type() - typ := r.Type() - vReg := c.AllocateVReg(typ) - c.ssaValueToVRegs[id] = vReg - c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp - } - for _, r := range rs { - id := r.ID() - ssaTyp := r.Type() - vReg := c.AllocateVReg(ssaTyp) - c.ssaValueToVRegs[id] = vReg - c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp - } - } - } - - for i, retBlk := 0, builder.ReturnBlock(); i < retBlk.Params(); i++ { - typ := retBlk.Param(i).Type() - vReg := c.AllocateVReg(typ) - c.returnVRegs = append(c.returnVRegs, vReg) - c.ssaTypeOfVRegID[vReg.ID()] = typ - } -} - -// AllocateVReg implements Compiler.AllocateVReg. -func (c *compiler) AllocateVReg(typ ssa.Type) regalloc.VReg { - regType := regalloc.RegTypeOf(typ) - r := regalloc.VReg(c.nextVRegID).SetRegType(regType) - - id := r.ID() - if int(id) >= len(c.ssaTypeOfVRegID) { - c.ssaTypeOfVRegID = append(c.ssaTypeOfVRegID, make([]ssa.Type, id+1)...) - } - c.ssaTypeOfVRegID[id] = typ - c.nextVRegID++ - return r -} - -// Init implements Compiler.Init. -func (c *compiler) Init() { - c.currentGID = 0 - c.nextVRegID = regalloc.VRegIDNonReservedBegin - c.returnVRegs = c.returnVRegs[:0] - c.mach.Reset() - c.varEdges = c.varEdges[:0] - c.constEdges = c.constEdges[:0] - c.buf = c.buf[:0] - c.sourceOffsets = c.sourceOffsets[:0] - c.relocations = c.relocations[:0] -} - -// ValueDefinition implements Compiler.ValueDefinition. -func (c *compiler) ValueDefinition(value ssa.Value) SSAValueDefinition { - return SSAValueDefinition{ - V: value, - Instr: c.ssaBuilder.InstructionOfValue(value), - RefCount: c.ssaValuesInfo[value.ID()].RefCount, - } -} - -// VRegOf implements Compiler.VRegOf. -func (c *compiler) VRegOf(value ssa.Value) regalloc.VReg { - return c.ssaValueToVRegs[value.ID()] -} - -// Format implements Compiler.Format. -func (c *compiler) Format() string { - return c.mach.Format() -} - -// TypeOf implements Compiler.Format. -func (c *compiler) TypeOf(v regalloc.VReg) ssa.Type { - return c.ssaTypeOfVRegID[v.ID()] -} - -// MatchInstr implements Compiler.MatchInstr. -func (c *compiler) MatchInstr(def SSAValueDefinition, opcode ssa.Opcode) bool { - instr := def.Instr - return def.IsFromInstr() && - instr.Opcode() == opcode && - instr.GroupID() == c.currentGID && - def.RefCount < 2 -} - -// MatchInstrOneOf implements Compiler.MatchInstrOneOf. -func (c *compiler) MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode { - instr := def.Instr - if !def.IsFromInstr() { - return ssa.OpcodeInvalid - } - - if instr.GroupID() != c.currentGID { - return ssa.OpcodeInvalid - } - - if def.RefCount >= 2 { - return ssa.OpcodeInvalid - } - - opcode := instr.Opcode() - for _, op := range opcodes { - if opcode == op { - return opcode - } - } - return ssa.OpcodeInvalid -} - -// SSABuilder implements Compiler .SSABuilder. -func (c *compiler) SSABuilder() ssa.Builder { - return c.ssaBuilder -} - -// AddSourceOffsetInfo implements Compiler.AddSourceOffsetInfo. -func (c *compiler) AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset) { - c.sourceOffsets = append(c.sourceOffsets, SourceOffsetInfo{ - SourceOffset: sourceOffset, - ExecutableOffset: executableOffset, - }) -} - -// SourceOffsetInfo implements Compiler.SourceOffsetInfo. -func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo { - return c.sourceOffsets -} - -// AddRelocationInfo implements Compiler.AddRelocationInfo. -func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef) { - c.relocations = append(c.relocations, RelocationInfo{ - Offset: int64(len(c.buf)), - FuncRef: funcRef, - }) -} - -// Emit8Bytes implements Compiler.Emit8Bytes. -func (c *compiler) Emit8Bytes(b uint64) { - c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24), byte(b>>32), byte(b>>40), byte(b>>48), byte(b>>56)) -} - -// Emit4Bytes implements Compiler.Emit4Bytes. -func (c *compiler) Emit4Bytes(b uint32) { - c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24)) -} - -// EmitByte implements Compiler.EmitByte. -func (c *compiler) EmitByte(b byte) { - c.buf = append(c.buf, b) -} - -// Buf implements Compiler.Buf. -func (c *compiler) Buf() []byte { - return c.buf -} - -// BufPtr implements Compiler.BufPtr. -func (c *compiler) BufPtr() *[]byte { - return &c.buf -} - -func (c *compiler) GetFunctionABI(sig *ssa.Signature) *FunctionABI { - if int(sig.ID) >= len(c.abis) { - c.abis = append(c.abis, make([]FunctionABI, int(sig.ID)+1)...) - } - - abi := &c.abis[sig.ID] - if abi.Initialized { - return abi - } - - abi.Init(sig, c.argResultInts, c.argResultFloats) - return abi -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go deleted file mode 100644 index 735cfa3d3..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go +++ /dev/null @@ -1,226 +0,0 @@ -package backend - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -// Lower implements Compiler.Lower. -func (c *compiler) Lower() { - c.assignVirtualRegisters() - c.mach.SetCurrentABI(c.GetFunctionABI(c.ssaBuilder.Signature())) - c.mach.StartLoweringFunction(c.ssaBuilder.BlockIDMax()) - c.lowerBlocks() -} - -// lowerBlocks lowers each block in the ssa.Builder. -func (c *compiler) lowerBlocks() { - builder := c.ssaBuilder - for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() { - c.lowerBlock(blk) - } - - // After lowering all blocks, we need to link adjacent blocks to layout one single instruction list. - var prev ssa.BasicBlock - for next := builder.BlockIteratorReversePostOrderBegin(); next != nil; next = builder.BlockIteratorReversePostOrderNext() { - if prev != nil { - c.mach.LinkAdjacentBlocks(prev, next) - } - prev = next - } -} - -func (c *compiler) lowerBlock(blk ssa.BasicBlock) { - mach := c.mach - mach.StartBlock(blk) - - // We traverse the instructions in reverse order because we might want to lower multiple - // instructions together. - cur := blk.Tail() - - // First gather the branching instructions at the end of the blocks. - var br0, br1 *ssa.Instruction - if cur.IsBranching() { - br0 = cur - cur = cur.Prev() - if cur != nil && cur.IsBranching() { - br1 = cur - cur = cur.Prev() - } - } - - if br0 != nil { - c.lowerBranches(br0, br1) - } - - if br1 != nil && br0 == nil { - panic("BUG? when a block has conditional branch but doesn't end with an unconditional branch?") - } - - // Now start lowering the non-branching instructions. - for ; cur != nil; cur = cur.Prev() { - c.setCurrentGroupID(cur.GroupID()) - if cur.Lowered() { - continue - } - - switch cur.Opcode() { - case ssa.OpcodeReturn: - rets := cur.ReturnVals() - if len(rets) > 0 { - c.mach.LowerReturns(rets) - } - c.mach.InsertReturn() - default: - mach.LowerInstr(cur) - } - mach.FlushPendingInstructions() - } - - // Finally, if this is the entry block, we have to insert copies of arguments from the real location to the VReg. - if blk.EntryBlock() { - c.lowerFunctionArguments(blk) - } - - mach.EndBlock() -} - -// lowerBranches is called right after StartBlock and before any LowerInstr call if -// there are branches to the given block. br0 is the very end of the block and b1 is the before the br0 if it exists. -// At least br0 is not nil, but br1 can be nil if there's no branching before br0. -// -// See ssa.Instruction IsBranching, and the comment on ssa.BasicBlock. -func (c *compiler) lowerBranches(br0, br1 *ssa.Instruction) { - mach := c.mach - - c.setCurrentGroupID(br0.GroupID()) - c.mach.LowerSingleBranch(br0) - mach.FlushPendingInstructions() - if br1 != nil { - c.setCurrentGroupID(br1.GroupID()) - c.mach.LowerConditionalBranch(br1) - mach.FlushPendingInstructions() - } - - if br0.Opcode() == ssa.OpcodeJump { - _, args, targetBlockID := br0.BranchData() - argExists := len(args) != 0 - if argExists && br1 != nil { - panic("BUG: critical edge split failed") - } - target := c.ssaBuilder.BasicBlock(targetBlockID) - if argExists && target.ReturnBlock() { - if len(args) > 0 { - c.mach.LowerReturns(args) - } - } else if argExists { - c.lowerBlockArguments(args, target) - } - } - mach.FlushPendingInstructions() -} - -func (c *compiler) lowerFunctionArguments(entry ssa.BasicBlock) { - mach := c.mach - - c.tmpVals = c.tmpVals[:0] - data := c.ssaBuilder.ValuesInfo() - for i := 0; i < entry.Params(); i++ { - p := entry.Param(i) - if data[p.ID()].RefCount > 0 { - c.tmpVals = append(c.tmpVals, p) - } else { - // If the argument is not used, we can just pass an invalid value. - c.tmpVals = append(c.tmpVals, ssa.ValueInvalid) - } - } - mach.LowerParams(c.tmpVals) - mach.FlushPendingInstructions() -} - -// lowerBlockArguments lowers how to pass arguments to the given successor block. -func (c *compiler) lowerBlockArguments(args []ssa.Value, succ ssa.BasicBlock) { - if len(args) != succ.Params() { - panic("BUG: mismatched number of arguments") - } - - c.varEdges = c.varEdges[:0] - c.varEdgeTypes = c.varEdgeTypes[:0] - c.constEdges = c.constEdges[:0] - for i := 0; i < len(args); i++ { - dst := succ.Param(i) - src := args[i] - - dstReg := c.VRegOf(dst) - srcInstr := c.ssaBuilder.InstructionOfValue(src) - if srcInstr != nil && srcInstr.Constant() { - c.constEdges = append(c.constEdges, struct { - cInst *ssa.Instruction - dst regalloc.VReg - }{cInst: srcInstr, dst: dstReg}) - } else { - srcReg := c.VRegOf(src) - // Even when the src=dst, insert the move so that we can keep such registers keep-alive. - c.varEdges = append(c.varEdges, [2]regalloc.VReg{srcReg, dstReg}) - c.varEdgeTypes = append(c.varEdgeTypes, src.Type()) - } - } - - // Check if there's an overlap among the dsts and srcs in varEdges. - c.vRegIDs = c.vRegIDs[:0] - for _, edge := range c.varEdges { - src := edge[0].ID() - if int(src) >= len(c.vRegSet) { - c.vRegSet = append(c.vRegSet, make([]bool, src+1)...) - } - c.vRegSet[src] = true - c.vRegIDs = append(c.vRegIDs, src) - } - separated := true - for _, edge := range c.varEdges { - dst := edge[1].ID() - if int(dst) >= len(c.vRegSet) { - c.vRegSet = append(c.vRegSet, make([]bool, dst+1)...) - } else { - if c.vRegSet[dst] { - separated = false - break - } - } - } - for _, id := range c.vRegIDs { - c.vRegSet[id] = false // reset for the next use. - } - - if separated { - // If there's no overlap, we can simply move the source to destination. - for i, edge := range c.varEdges { - src, dst := edge[0], edge[1] - c.mach.InsertMove(dst, src, c.varEdgeTypes[i]) - } - } else { - // Otherwise, we allocate a temporary registers and move the source to the temporary register, - // - // First move all of them to temporary registers. - c.tempRegs = c.tempRegs[:0] - for i, edge := range c.varEdges { - src := edge[0] - typ := c.varEdgeTypes[i] - temp := c.AllocateVReg(typ) - c.tempRegs = append(c.tempRegs, temp) - c.mach.InsertMove(temp, src, typ) - } - // Then move the temporary registers to the destination. - for i, edge := range c.varEdges { - temp := c.tempRegs[i] - dst := edge[1] - c.mach.InsertMove(dst, temp, c.varEdgeTypes[i]) - } - } - - // Finally, move the constants. - for _, edge := range c.constEdges { - cInst, dst := edge.cInst, edge.dst - c.mach.InsertLoadConstantBlockArg(cInst, dst) - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/go_call.go deleted file mode 100644 index 6fe6d7b3c..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/go_call.go +++ /dev/null @@ -1,33 +0,0 @@ -package backend - -import "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - -// GoFunctionCallRequiredStackSize returns the size of the stack required for the Go function call. -// argBegin is the index of the first argument in the signature which is not either execution context or module context. -func GoFunctionCallRequiredStackSize(sig *ssa.Signature, argBegin int) (ret, retUnaligned int64) { - var paramNeededInBytes, resultNeededInBytes int64 - for _, p := range sig.Params[argBegin:] { - s := int64(p.Size()) - if s < 8 { - s = 8 // We use uint64 for all basic types, except SIMD v128. - } - paramNeededInBytes += s - } - for _, r := range sig.Results { - s := int64(r.Size()) - if s < 8 { - s = 8 // We use uint64 for all basic types, except SIMD v128. - } - resultNeededInBytes += s - } - - if paramNeededInBytes > resultNeededInBytes { - ret = paramNeededInBytes - } else { - ret = resultNeededInBytes - } - retUnaligned = ret - // Align to 16 bytes. - ret = (ret + 15) &^ 15 - return -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi.go deleted file mode 100644 index 130f8c621..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi.go +++ /dev/null @@ -1,186 +0,0 @@ -package amd64 - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -// For the details of the ABI, see: -// https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#amd64-architecture - -var ( - intArgResultRegs = []regalloc.RealReg{rax, rbx, rcx, rdi, rsi, r8, r9, r10, r11} - floatArgResultRegs = []regalloc.RealReg{xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7} -) - -var regInfo = ®alloc.RegisterInfo{ - AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{ - regalloc.RegTypeInt: { - rax, rcx, rdx, rbx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15, - }, - regalloc.RegTypeFloat: { - xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, - }, - }, - CalleeSavedRegisters: regalloc.NewRegSet( - rdx, r12, r13, r14, r15, - xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, - ), - CallerSavedRegisters: regalloc.NewRegSet( - rax, rcx, rbx, rsi, rdi, r8, r9, r10, r11, - xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - ), - RealRegToVReg: []regalloc.VReg{ - rax: raxVReg, rcx: rcxVReg, rdx: rdxVReg, rbx: rbxVReg, rsp: rspVReg, rbp: rbpVReg, rsi: rsiVReg, rdi: rdiVReg, - r8: r8VReg, r9: r9VReg, r10: r10VReg, r11: r11VReg, r12: r12VReg, r13: r13VReg, r14: r14VReg, r15: r15VReg, - xmm0: xmm0VReg, xmm1: xmm1VReg, xmm2: xmm2VReg, xmm3: xmm3VReg, xmm4: xmm4VReg, xmm5: xmm5VReg, xmm6: xmm6VReg, - xmm7: xmm7VReg, xmm8: xmm8VReg, xmm9: xmm9VReg, xmm10: xmm10VReg, xmm11: xmm11VReg, xmm12: xmm12VReg, - xmm13: xmm13VReg, xmm14: xmm14VReg, xmm15: xmm15VReg, - }, - RealRegName: func(r regalloc.RealReg) string { return regNames[r] }, - RealRegType: func(r regalloc.RealReg) regalloc.RegType { - if r < xmm0 { - return regalloc.RegTypeInt - } - return regalloc.RegTypeFloat - }, -} - -// ArgsResultsRegs implements backend.Machine. -func (m *machine) ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) { - return intArgResultRegs, floatArgResultRegs -} - -// LowerParams implements backend.Machine. -func (m *machine) LowerParams(args []ssa.Value) { - a := m.currentABI - - for i, ssaArg := range args { - if !ssaArg.Valid() { - continue - } - reg := m.c.VRegOf(ssaArg) - arg := &a.Args[i] - if arg.Kind == backend.ABIArgKindReg { - m.InsertMove(reg, arg.Reg, arg.Type) - } else { - // - // (high address) - // +-----------------+ - // | ....... | - // | ret Y | - // | ....... | - // | ret 0 | - // | arg X | - // | ....... | - // | arg 1 | - // | arg 0 | - // | ReturnAddress | - // | Caller_RBP | - // +-----------------+ <-- RBP - // | ........... | - // | clobbered M | - // | ............ | - // | clobbered 0 | - // | spill slot N | - // | ........... | - // | spill slot 0 | - // RSP--> +-----------------+ - // (low address) - - // Load the value from the arg stack slot above the current RBP. - load := m.allocateInstr() - mem := newOperandMem(m.newAmodeImmRBPReg(uint32(arg.Offset + 16))) - switch arg.Type { - case ssa.TypeI32: - load.asMovzxRmR(extModeLQ, mem, reg) - case ssa.TypeI64: - load.asMov64MR(mem, reg) - case ssa.TypeF32: - load.asXmmUnaryRmR(sseOpcodeMovss, mem, reg) - case ssa.TypeF64: - load.asXmmUnaryRmR(sseOpcodeMovsd, mem, reg) - case ssa.TypeV128: - load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, reg) - default: - panic("BUG") - } - m.insert(load) - } - } -} - -// LowerReturns implements backend.Machine. -func (m *machine) LowerReturns(rets []ssa.Value) { - // Load the XMM registers first as it might need a temporary register to inline - // constant return. - a := m.currentABI - for i, ret := range rets { - r := &a.Rets[i] - if !r.Type.IsInt() { - m.LowerReturn(ret, r) - } - } - // Then load the GPR registers. - for i, ret := range rets { - r := &a.Rets[i] - if r.Type.IsInt() { - m.LowerReturn(ret, r) - } - } -} - -func (m *machine) LowerReturn(ret ssa.Value, r *backend.ABIArg) { - reg := m.c.VRegOf(ret) - if def := m.c.ValueDefinition(ret); def.IsFromInstr() { - // Constant instructions are inlined. - if inst := def.Instr; inst.Constant() { - m.insertLoadConstant(inst, reg) - } - } - if r.Kind == backend.ABIArgKindReg { - m.InsertMove(r.Reg, reg, ret.Type()) - } else { - // - // (high address) - // +-----------------+ - // | ....... | - // | ret Y | - // | ....... | - // | ret 0 | - // | arg X | - // | ....... | - // | arg 1 | - // | arg 0 | - // | ReturnAddress | - // | Caller_RBP | - // +-----------------+ <-- RBP - // | ........... | - // | clobbered M | - // | ............ | - // | clobbered 0 | - // | spill slot N | - // | ........... | - // | spill slot 0 | - // RSP--> +-----------------+ - // (low address) - - // Store the value to the return stack slot above the current RBP. - store := m.allocateInstr() - mem := newOperandMem(m.newAmodeImmRBPReg(uint32(m.currentABI.ArgStackSize + 16 + r.Offset))) - switch r.Type { - case ssa.TypeI32: - store.asMovRM(reg, mem, 4) - case ssa.TypeI64: - store.asMovRM(reg, mem, 8) - case ssa.TypeF32: - store.asXmmMovRM(sseOpcodeMovss, reg, mem) - case ssa.TypeF64: - store.asXmmMovRM(sseOpcodeMovsd, reg, mem) - case ssa.TypeV128: - store.asXmmMovRM(sseOpcodeMovdqu, reg, mem) - } - m.insert(store) - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.go deleted file mode 100644 index cbf1cfdc5..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.go +++ /dev/null @@ -1,9 +0,0 @@ -package amd64 - -// entrypoint enters the machine code generated by this backend which begins with the preamble generated by functionABI.EmitGoEntryPreamble below. -// This implements wazevo.entrypoint, and see the comments there for detail. -func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr) - -// afterGoFunctionCallEntrypoint enters the machine code after growing the stack. -// This implements wazevo.afterGoFunctionCallEntrypoint, and see the comments there for detail. -func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.s b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.s deleted file mode 100644 index e9cb131d1..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.s +++ /dev/null @@ -1,29 +0,0 @@ -#include "funcdata.h" -#include "textflag.h" - -// entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr -TEXT ·entrypoint(SB), NOSPLIT|NOFRAME, $0-48 - MOVQ preambleExecutable+0(FP), R11 - MOVQ functionExectuable+8(FP), R14 - MOVQ executionContextPtr+16(FP), AX // First argument is passed in AX. - MOVQ moduleContextPtr+24(FP), BX // Second argument is passed in BX. - MOVQ paramResultSlicePtr+32(FP), R12 - MOVQ goAllocatedStackSlicePtr+40(FP), R13 - JMP R11 - -// afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr) -TEXT ·afterGoFunctionCallEntrypoint(SB), NOSPLIT|NOFRAME, $0-32 - MOVQ executable+0(FP), CX - MOVQ executionContextPtr+8(FP), AX // First argument is passed in AX. - - // Save the stack pointer and frame pointer. - MOVQ BP, 16(AX) // 16 == ExecutionContextOffsetOriginalFramePointer - MOVQ SP, 24(AX) // 24 == ExecutionContextOffsetOriginalStackPointer - - // Then set the stack pointer and frame pointer to the values we got from the Go runtime. - MOVQ framePointer+24(FP), BP - - // WARNING: do not update SP before BP, because the Go translates (FP) as (SP) + 8. - MOVQ stackPointer+16(FP), SP - - JMP CX diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go deleted file mode 100644 index 882d06c06..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go +++ /dev/null @@ -1,248 +0,0 @@ -package amd64 - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -var ( - executionContextPtrReg = raxVReg - - // Followings are callee saved registers. They can be used freely in the entry preamble - // since the preamble is called via Go assembly function which has stack-based ABI. - - // savedExecutionContextPtr also must be a callee-saved reg so that they can be used in the prologue and epilogue. - savedExecutionContextPtr = rdxVReg - // paramResultSlicePtr must match with entrypoint function in abi_entry_amd64.s. - paramResultSlicePtr = r12VReg - // goAllocatedStackPtr must match with entrypoint function in abi_entry_amd64.s. - goAllocatedStackPtr = r13VReg - // functionExecutable must match with entrypoint function in abi_entry_amd64.s. - functionExecutable = r14VReg - tmpIntReg = r15VReg - tmpXmmReg = xmm15VReg -) - -// CompileEntryPreamble implements backend.Machine. -func (m *machine) CompileEntryPreamble(sig *ssa.Signature) []byte { - root := m.compileEntryPreamble(sig) - m.encodeWithoutSSA(root) - buf := m.c.Buf() - return buf -} - -func (m *machine) compileEntryPreamble(sig *ssa.Signature) *instruction { - abi := backend.FunctionABI{} - abi.Init(sig, intArgResultRegs, floatArgResultRegs) - - root := m.allocateNop() - - //// ----------------------------------- prologue ----------------------------------- //// - - // First, we save executionContextPtrReg into a callee-saved register so that it can be used in epilogue as well. - // mov %executionContextPtrReg, %savedExecutionContextPtr - cur := m.move64(executionContextPtrReg, savedExecutionContextPtr, root) - - // Next is to save the original RBP and RSP into the execution context. - cur = m.saveOriginalRSPRBP(cur) - - // Now set the RSP to the Go-allocated stack pointer. - // mov %goAllocatedStackPtr, %rsp - cur = m.move64(goAllocatedStackPtr, rspVReg, cur) - - if stackSlotSize := abi.AlignedArgResultStackSlotSize(); stackSlotSize > 0 { - // Allocate stack slots for the arguments and return values. - // sub $stackSlotSize, %rsp - spDec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(uint32(stackSlotSize)), rspVReg, true) - cur = linkInstr(cur, spDec) - } - - var offset uint32 - for i := range abi.Args { - if i < 2 { - // module context ptr and execution context ptr are passed in rax and rbx by the Go assembly function. - continue - } - arg := &abi.Args[i] - cur = m.goEntryPreamblePassArg(cur, paramResultSlicePtr, offset, arg) - if arg.Type == ssa.TypeV128 { - offset += 16 - } else { - offset += 8 - } - } - - // Zero out RBP so that the unwind/stack growth code can correctly detect the end of the stack. - zerosRbp := m.allocateInstr().asAluRmiR(aluRmiROpcodeXor, newOperandReg(rbpVReg), rbpVReg, true) - cur = linkInstr(cur, zerosRbp) - - // Now ready to call the real function. Note that at this point stack pointer is already set to the Go-allocated, - // which is aligned to 16 bytes. - call := m.allocateInstr().asCallIndirect(newOperandReg(functionExecutable), &abi) - cur = linkInstr(cur, call) - - //// ----------------------------------- epilogue ----------------------------------- //// - - // Read the results from regs and the stack, and set them correctly into the paramResultSlicePtr. - offset = 0 - for i := range abi.Rets { - r := &abi.Rets[i] - cur = m.goEntryPreamblePassResult(cur, paramResultSlicePtr, offset, r, uint32(abi.ArgStackSize)) - if r.Type == ssa.TypeV128 { - offset += 16 - } else { - offset += 8 - } - } - - // Finally, restore the original RBP and RSP. - cur = m.restoreOriginalRSPRBP(cur) - - ret := m.allocateInstr().asRet() - linkInstr(cur, ret) - return root -} - -// saveOriginalRSPRBP saves the original RSP and RBP into the execution context. -func (m *machine) saveOriginalRSPRBP(cur *instruction) *instruction { - // mov %rbp, wazevoapi.ExecutionContextOffsetOriginalFramePointer(%executionContextPtrReg) - // mov %rsp, wazevoapi.ExecutionContextOffsetOriginalStackPointer(%executionContextPtrReg) - cur = m.loadOrStore64AtExecutionCtx(executionContextPtrReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, rbpVReg, true, cur) - cur = m.loadOrStore64AtExecutionCtx(executionContextPtrReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, rspVReg, true, cur) - return cur -} - -// restoreOriginalRSPRBP restores the original RSP and RBP from the execution context. -func (m *machine) restoreOriginalRSPRBP(cur *instruction) *instruction { - // mov wazevoapi.ExecutionContextOffsetOriginalFramePointer(%executionContextPtrReg), %rbp - // mov wazevoapi.ExecutionContextOffsetOriginalStackPointer(%executionContextPtrReg), %rsp - cur = m.loadOrStore64AtExecutionCtx(savedExecutionContextPtr, wazevoapi.ExecutionContextOffsetOriginalFramePointer, rbpVReg, false, cur) - cur = m.loadOrStore64AtExecutionCtx(savedExecutionContextPtr, wazevoapi.ExecutionContextOffsetOriginalStackPointer, rspVReg, false, cur) - return cur -} - -func (m *machine) move64(src, dst regalloc.VReg, prev *instruction) *instruction { - mov := m.allocateInstr().asMovRR(src, dst, true) - return linkInstr(prev, mov) -} - -func (m *machine) loadOrStore64AtExecutionCtx(execCtx regalloc.VReg, offset wazevoapi.Offset, r regalloc.VReg, store bool, prev *instruction) *instruction { - mem := newOperandMem(m.newAmodeImmReg(offset.U32(), execCtx)) - instr := m.allocateInstr() - if store { - instr.asMovRM(r, mem, 8) - } else { - instr.asMov64MR(mem, r) - } - return linkInstr(prev, instr) -} - -// This is for debugging. -func (m *machine) linkUD2(cur *instruction) *instruction { //nolint - return linkInstr(cur, m.allocateInstr().asUD2()) -} - -func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regalloc.VReg, offsetInParamSlice uint32, arg *backend.ABIArg) *instruction { - var dst regalloc.VReg - argTyp := arg.Type - if arg.Kind == backend.ABIArgKindStack { - // Caller saved registers ca - switch argTyp { - case ssa.TypeI32, ssa.TypeI64: - dst = tmpIntReg - case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - dst = tmpXmmReg - default: - panic("BUG") - } - } else { - dst = arg.Reg - } - - load := m.allocateInstr() - a := newOperandMem(m.newAmodeImmReg(offsetInParamSlice, paramSlicePtr)) - switch arg.Type { - case ssa.TypeI32: - load.asMovzxRmR(extModeLQ, a, dst) - case ssa.TypeI64: - load.asMov64MR(a, dst) - case ssa.TypeF32: - load.asXmmUnaryRmR(sseOpcodeMovss, a, dst) - case ssa.TypeF64: - load.asXmmUnaryRmR(sseOpcodeMovsd, a, dst) - case ssa.TypeV128: - load.asXmmUnaryRmR(sseOpcodeMovdqu, a, dst) - } - - cur = linkInstr(cur, load) - if arg.Kind == backend.ABIArgKindStack { - // Store back to the stack. - store := m.allocateInstr() - a := newOperandMem(m.newAmodeImmReg(uint32(arg.Offset), rspVReg)) - switch arg.Type { - case ssa.TypeI32: - store.asMovRM(dst, a, 4) - case ssa.TypeI64: - store.asMovRM(dst, a, 8) - case ssa.TypeF32: - store.asXmmMovRM(sseOpcodeMovss, dst, a) - case ssa.TypeF64: - store.asXmmMovRM(sseOpcodeMovsd, dst, a) - case ssa.TypeV128: - store.asXmmMovRM(sseOpcodeMovdqu, dst, a) - } - cur = linkInstr(cur, store) - } - return cur -} - -func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr regalloc.VReg, offsetInResultSlice uint32, result *backend.ABIArg, resultStackSlotBeginOffset uint32) *instruction { - var r regalloc.VReg - if result.Kind == backend.ABIArgKindStack { - // Load the value to the temporary. - load := m.allocateInstr() - offset := resultStackSlotBeginOffset + uint32(result.Offset) - a := newOperandMem(m.newAmodeImmReg(offset, rspVReg)) - switch result.Type { - case ssa.TypeI32: - r = tmpIntReg - load.asMovzxRmR(extModeLQ, a, r) - case ssa.TypeI64: - r = tmpIntReg - load.asMov64MR(a, r) - case ssa.TypeF32: - r = tmpXmmReg - load.asXmmUnaryRmR(sseOpcodeMovss, a, r) - case ssa.TypeF64: - r = tmpXmmReg - load.asXmmUnaryRmR(sseOpcodeMovsd, a, r) - case ssa.TypeV128: - r = tmpXmmReg - load.asXmmUnaryRmR(sseOpcodeMovdqu, a, r) - default: - panic("BUG") - } - cur = linkInstr(cur, load) - } else { - r = result.Reg - } - - store := m.allocateInstr() - a := newOperandMem(m.newAmodeImmReg(offsetInResultSlice, resultSlicePtr)) - switch result.Type { - case ssa.TypeI32: - store.asMovRM(r, a, 4) - case ssa.TypeI64: - store.asMovRM(r, a, 8) - case ssa.TypeF32: - store.asXmmMovRM(sseOpcodeMovss, r, a) - case ssa.TypeF64: - store.asXmmMovRM(sseOpcodeMovsd, r, a) - case ssa.TypeV128: - store.asXmmMovRM(sseOpcodeMovdqu, r, a) - } - - return linkInstr(cur, store) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go deleted file mode 100644 index 96f035e58..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go +++ /dev/null @@ -1,440 +0,0 @@ -package amd64 - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -var calleeSavedVRegs = []regalloc.VReg{ - rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg, - xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg, -} - -// CompileGoFunctionTrampoline implements backend.Machine. -func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte { - argBegin := 1 // Skips exec context by default. - if needModuleContextPtr { - argBegin++ - } - - abi := &backend.FunctionABI{} - abi.Init(sig, intArgResultRegs, floatArgResultRegs) - m.currentABI = abi - - cur := m.allocateNop() - m.rootInstr = cur - - // Execution context is always the first argument. - execCtrPtr := raxVReg - - // First we update RBP and RSP just like the normal prologue. - // - // (high address) (high address) - // RBP ----> +-----------------+ +-----------------+ - // | ....... | | ....... | - // | ret Y | | ret Y | - // | ....... | | ....... | - // | ret 0 | | ret 0 | - // | arg X | | arg X | - // | ....... | ====> | ....... | - // | arg 1 | | arg 1 | - // | arg 0 | | arg 0 | - // | Return Addr | | Return Addr | - // RSP ----> +-----------------+ | Caller_RBP | - // (low address) +-----------------+ <----- RSP, RBP - // - cur = m.setupRBPRSP(cur) - - goSliceSizeAligned, goSliceSizeAlignedUnaligned := backend.GoFunctionCallRequiredStackSize(sig, argBegin) - cur = m.insertStackBoundsCheck(goSliceSizeAligned+8 /* size of the Go slice */, cur) - - // Save the callee saved registers. - cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs) - - if needModuleContextPtr { - moduleCtrPtr := rbxVReg // Module context is always the second argument. - mem := m.newAmodeImmReg( - wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.U32(), - execCtrPtr) - store := m.allocateInstr().asMovRM(moduleCtrPtr, newOperandMem(mem), 8) - cur = linkInstr(cur, store) - } - - // Now let's advance the RSP to the stack slot for the arguments. - // - // (high address) (high address) - // +-----------------+ +-----------------+ - // | ....... | | ....... | - // | ret Y | | ret Y | - // | ....... | | ....... | - // | ret 0 | | ret 0 | - // | arg X | | arg X | - // | ....... | =======> | ....... | - // | arg 1 | | arg 1 | - // | arg 0 | | arg 0 | - // | Return Addr | | Return Addr | - // | Caller_RBP | | Caller_RBP | - // RBP,RSP --> +-----------------+ +-----------------+ <----- RBP - // (low address) | arg[N]/ret[M] | - // | .......... | - // | arg[1]/ret[1] | - // | arg[0]/ret[0] | - // +-----------------+ <----- RSP - // (low address) - // - // where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions, - // therefore will be accessed as the usual []uint64. So that's where we need to pass/receive - // the arguments/return values to/from Go function. - cur = m.addRSP(-int32(goSliceSizeAligned), cur) - - // Next, we need to store all the arguments to the stack in the typical Wasm stack style. - var offsetInGoSlice int32 - for i := range abi.Args[argBegin:] { - arg := &abi.Args[argBegin+i] - var v regalloc.VReg - if arg.Kind == backend.ABIArgKindReg { - v = arg.Reg - } else { - // We have saved callee saved registers, so we can use them. - if arg.Type.IsInt() { - v = r15VReg - } else { - v = xmm15VReg - } - mem := newOperandMem(m.newAmodeImmReg(uint32(arg.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg)) - load := m.allocateInstr() - switch arg.Type { - case ssa.TypeI32: - load.asMovzxRmR(extModeLQ, mem, v) - case ssa.TypeI64: - load.asMov64MR(mem, v) - case ssa.TypeF32: - load.asXmmUnaryRmR(sseOpcodeMovss, mem, v) - case ssa.TypeF64: - load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v) - case ssa.TypeV128: - load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v) - default: - panic("BUG") - } - cur = linkInstr(cur, load) - } - - store := m.allocateInstr() - mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg)) - switch arg.Type { - case ssa.TypeI32: - store.asMovRM(v, mem, 4) - offsetInGoSlice += 8 // always uint64 rep. - case ssa.TypeI64: - store.asMovRM(v, mem, 8) - offsetInGoSlice += 8 - case ssa.TypeF32: - store.asXmmMovRM(sseOpcodeMovss, v, mem) - offsetInGoSlice += 8 // always uint64 rep. - case ssa.TypeF64: - store.asXmmMovRM(sseOpcodeMovsd, v, mem) - offsetInGoSlice += 8 - case ssa.TypeV128: - store.asXmmMovRM(sseOpcodeMovdqu, v, mem) - offsetInGoSlice += 16 - default: - panic("BUG") - } - cur = linkInstr(cur, store) - } - - // Finally we push the size of the slice to the stack so the stack looks like: - // - // (high address) - // +-----------------+ - // | ....... | - // | ret Y | - // | ....... | - // | ret 0 | - // | arg X | - // | ....... | - // | arg 1 | - // | arg 0 | - // | Return Addr | - // | Caller_RBP | - // +-----------------+ <----- RBP - // | arg[N]/ret[M] | - // | .......... | - // | arg[1]/ret[1] | - // | arg[0]/ret[0] | - // | slice size | - // +-----------------+ <----- RSP - // (low address) - // - // push $sliceSize - cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandImm32(uint32(goSliceSizeAlignedUnaligned)))) - - // Load the exitCode to the register. - exitCodeReg := r12VReg // Callee saved which is already saved. - cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(exitCode), false)) - - saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg) - cur = linkInstr(cur, setExitCode) - cur = linkInstr(cur, saveRsp) - cur = linkInstr(cur, saveRbp) - - // Ready to exit the execution. - cur = m.storeReturnAddressAndExit(cur, execCtrPtr) - - // We don't need the slice size anymore, so pop it. - cur = m.addRSP(8, cur) - - // Ready to set up the results. - offsetInGoSlice = 0 - // To avoid overwriting with the execution context pointer by the result, we need to track the offset, - // and defer the restoration of the result to the end of this function. - var argOverlapWithExecCtxOffset int32 = -1 - for i := range abi.Rets { - r := &abi.Rets[i] - var v regalloc.VReg - isRegResult := r.Kind == backend.ABIArgKindReg - if isRegResult { - v = r.Reg - if v.RealReg() == execCtrPtr.RealReg() { - argOverlapWithExecCtxOffset = offsetInGoSlice - offsetInGoSlice += 8 // always uint64 rep. - continue - } - } else { - if r.Type.IsInt() { - v = r15VReg - } else { - v = xmm15VReg - } - } - - load := m.allocateInstr() - mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg)) - switch r.Type { - case ssa.TypeI32: - load.asMovzxRmR(extModeLQ, mem, v) - offsetInGoSlice += 8 // always uint64 rep. - case ssa.TypeI64: - load.asMov64MR(mem, v) - offsetInGoSlice += 8 - case ssa.TypeF32: - load.asXmmUnaryRmR(sseOpcodeMovss, mem, v) - offsetInGoSlice += 8 // always uint64 rep. - case ssa.TypeF64: - load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v) - offsetInGoSlice += 8 - case ssa.TypeV128: - load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v) - offsetInGoSlice += 16 - default: - panic("BUG") - } - cur = linkInstr(cur, load) - - if !isRegResult { - // We need to store it back to the result slot above rbp. - store := m.allocateInstr() - mem := newOperandMem(m.newAmodeImmReg(uint32(abi.ArgStackSize+r.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg)) - switch r.Type { - case ssa.TypeI32: - store.asMovRM(v, mem, 4) - case ssa.TypeI64: - store.asMovRM(v, mem, 8) - case ssa.TypeF32: - store.asXmmMovRM(sseOpcodeMovss, v, mem) - case ssa.TypeF64: - store.asXmmMovRM(sseOpcodeMovsd, v, mem) - case ssa.TypeV128: - store.asXmmMovRM(sseOpcodeMovdqu, v, mem) - default: - panic("BUG") - } - cur = linkInstr(cur, store) - } - } - - // Before return, we need to restore the callee saved registers. - cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs) - - if argOverlapWithExecCtxOffset >= 0 { - // At this point execCtt is not used anymore, so we can finally store the - // result to the register which overlaps with the execution context pointer. - mem := newOperandMem(m.newAmodeImmReg(uint32(argOverlapWithExecCtxOffset), rspVReg)) - load := m.allocateInstr().asMov64MR(mem, execCtrPtr) - cur = linkInstr(cur, load) - } - - // Finally ready to return. - cur = m.revertRBPRSP(cur) - linkInstr(cur, m.allocateInstr().asRet()) - - m.encodeWithoutSSA(m.rootInstr) - return m.c.Buf() -} - -func (m *machine) saveRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction { - offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64() - for _, v := range regs { - store := m.allocateInstr() - mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx)) - switch v.RegType() { - case regalloc.RegTypeInt: - store.asMovRM(v, mem, 8) - case regalloc.RegTypeFloat: - store.asXmmMovRM(sseOpcodeMovdqu, v, mem) - default: - panic("BUG") - } - cur = linkInstr(cur, store) - offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally. - } - return cur -} - -func (m *machine) restoreRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction { - offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64() - for _, v := range regs { - load := m.allocateInstr() - mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx)) - switch v.RegType() { - case regalloc.RegTypeInt: - load.asMov64MR(mem, v) - case regalloc.RegTypeFloat: - load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v) - default: - panic("BUG") - } - cur = linkInstr(cur, load) - offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally. - } - return cur -} - -func (m *machine) storeReturnAddressAndExit(cur *instruction, execCtx regalloc.VReg) *instruction { - readRip := m.allocateInstr() - cur = linkInstr(cur, readRip) - - ripReg := r12VReg // Callee saved which is already saved. - saveRip := m.allocateInstr().asMovRM( - ripReg, - newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetGoCallReturnAddress.U32(), execCtx)), - 8, - ) - cur = linkInstr(cur, saveRip) - - exit := m.allocateExitSeq(execCtx) - cur = linkInstr(cur, exit) - - nop, l := m.allocateBrTarget() - cur = linkInstr(cur, nop) - readRip.asLEA(newOperandLabel(l), ripReg) - return cur -} - -// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient -// stack space left. Basically this is the all allocatable registers except for RSP and RBP, and RAX which contains the -// execution context pointer. ExecCtx pointer is always the first argument so we don't need to save it. -var stackGrowSaveVRegs = []regalloc.VReg{ - rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg, - rcxVReg, rbxVReg, rsiVReg, rdiVReg, r8VReg, r9VReg, r10VReg, r11VReg, - xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg, - xmm0VReg, xmm1VReg, xmm2VReg, xmm3VReg, xmm4VReg, xmm5VReg, xmm6VReg, xmm7VReg, -} - -// CompileStackGrowCallSequence implements backend.Machine. -func (m *machine) CompileStackGrowCallSequence() []byte { - cur := m.allocateNop() - m.rootInstr = cur - - cur = m.setupRBPRSP(cur) - - // Execution context is always the first argument. - execCtrPtr := raxVReg - - // Save the callee saved and argument registers. - cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs) - - // Load the exitCode to the register. - exitCodeReg := r12VReg // Already saved. - cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(wazevoapi.ExitCodeGrowStack), false)) - - saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg) - cur = linkInstr(cur, setExitCode) - cur = linkInstr(cur, saveRsp) - cur = linkInstr(cur, saveRbp) - - // Ready to exit the execution. - cur = m.storeReturnAddressAndExit(cur, execCtrPtr) - - // After the exit, restore the saved registers. - cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs) - - // Finally ready to return. - cur = m.revertRBPRSP(cur) - linkInstr(cur, m.allocateInstr().asRet()) - - m.encodeWithoutSSA(m.rootInstr) - return m.c.Buf() -} - -// insertStackBoundsCheck will insert the instructions after `cur` to check the -// stack bounds, and if there's no sufficient spaces required for the function, -// exit the execution and try growing it in Go world. -func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instruction) *instruction { - // add $requiredStackSize, %rsp ;; Temporarily update the sp. - // cmp ExecutionContextOffsetStackBottomPtr(%rax), %rsp ;; Compare the stack bottom and the sp. - // ja .ok - // sub $requiredStackSize, %rsp ;; Reverse the temporary update. - // pushq r15 ;; save the temporary. - // mov $requiredStackSize, %r15 - // mov %15, ExecutionContextOffsetStackGrowRequiredSize(%rax) ;; Set the required size in the execution context. - // popq r15 ;; restore the temporary. - // callq *ExecutionContextOffsetStackGrowCallTrampolineAddress(%rax) ;; Call the Go function to grow the stack. - // jmp .cont - // .ok: - // sub $requiredStackSize, %rsp ;; Reverse the temporary update. - // .cont: - cur = m.addRSP(-int32(requiredStackSize), cur) - cur = linkInstr(cur, m.allocateInstr().asCmpRmiR(true, - newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackBottomPtr.U32(), raxVReg)), - rspVReg, true)) - - ja := m.allocateInstr() - cur = linkInstr(cur, ja) - - cur = m.addRSP(int32(requiredStackSize), cur) - - // Save the temporary. - - cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r15VReg))) - // Load the required size to the temporary. - cur = linkInstr(cur, m.allocateInstr().asImm(r15VReg, uint64(requiredStackSize), true)) - // Set the required size in the execution context. - cur = linkInstr(cur, m.allocateInstr().asMovRM(r15VReg, - newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.U32(), raxVReg)), 8)) - // Restore the temporary. - cur = linkInstr(cur, m.allocateInstr().asPop64(r15VReg)) - // Call the Go function to grow the stack. - cur = linkInstr(cur, m.allocateInstr().asCallIndirect(newOperandMem(m.newAmodeImmReg( - wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.U32(), raxVReg)), nil)) - // Jump to the continuation. - jmpToCont := m.allocateInstr() - cur = linkInstr(cur, jmpToCont) - - // .ok: - okInstr, ok := m.allocateBrTarget() - cur = linkInstr(cur, okInstr) - ja.asJmpIf(condNBE, newOperandLabel(ok)) - // On the ok path, we only need to reverse the temporary update. - cur = m.addRSP(int32(requiredStackSize), cur) - - // .cont: - contInstr, cont := m.allocateBrTarget() - cur = linkInstr(cur, contInstr) - jmpToCont.asJmp(newOperandLabel(cont)) - - return cur -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/cond.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/cond.go deleted file mode 100644 index 75cbeab75..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/cond.go +++ /dev/null @@ -1,168 +0,0 @@ -package amd64 - -import ( - "fmt" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -type cond byte - -const ( - // condO represents (overflow) condition. - condO cond = iota - // condNO represents (no overflow) condition. - condNO - // condB represents (< unsigned) condition. - condB - // condNB represents (>= unsigned) condition. - condNB - // condZ represents (zero) condition. - condZ - // condNZ represents (not-zero) condition. - condNZ - // condBE represents (<= unsigned) condition. - condBE - // condNBE represents (> unsigned) condition. - condNBE - // condS represents (negative) condition. - condS - // condNS represents (not-negative) condition. - condNS - // condP represents (parity) condition. - condP - // condNP represents (not parity) condition. - condNP - // condL represents (< signed) condition. - condL - // condNL represents (>= signed) condition. - condNL - // condLE represents (<= signed) condition. - condLE - // condNLE represents (> signed) condition. - condNLE - - condInvalid -) - -func (c cond) String() string { - switch c { - case condO: - return "o" - case condNO: - return "no" - case condB: - return "b" - case condNB: - return "nb" - case condZ: - return "z" - case condNZ: - return "nz" - case condBE: - return "be" - case condNBE: - return "nbe" - case condS: - return "s" - case condNS: - return "ns" - case condL: - return "l" - case condNL: - return "nl" - case condLE: - return "le" - case condNLE: - return "nle" - case condP: - return "p" - case condNP: - return "np" - default: - panic("unreachable") - } -} - -func condFromSSAIntCmpCond(origin ssa.IntegerCmpCond) cond { - switch origin { - case ssa.IntegerCmpCondEqual: - return condZ - case ssa.IntegerCmpCondNotEqual: - return condNZ - case ssa.IntegerCmpCondSignedLessThan: - return condL - case ssa.IntegerCmpCondSignedGreaterThanOrEqual: - return condNL - case ssa.IntegerCmpCondSignedGreaterThan: - return condNLE - case ssa.IntegerCmpCondSignedLessThanOrEqual: - return condLE - case ssa.IntegerCmpCondUnsignedLessThan: - return condB - case ssa.IntegerCmpCondUnsignedGreaterThanOrEqual: - return condNB - case ssa.IntegerCmpCondUnsignedGreaterThan: - return condNBE - case ssa.IntegerCmpCondUnsignedLessThanOrEqual: - return condBE - default: - panic("unreachable") - } -} - -func condFromSSAFloatCmpCond(origin ssa.FloatCmpCond) cond { - switch origin { - case ssa.FloatCmpCondGreaterThanOrEqual: - return condNB - case ssa.FloatCmpCondGreaterThan: - return condNBE - case ssa.FloatCmpCondEqual, ssa.FloatCmpCondNotEqual, ssa.FloatCmpCondLessThan, ssa.FloatCmpCondLessThanOrEqual: - panic(fmt.Sprintf("cond %s must be treated as a special case", origin)) - default: - panic("unreachable") - } -} - -func (c cond) encoding() byte { - return byte(c) -} - -func (c cond) invert() cond { - switch c { - case condO: - return condNO - case condNO: - return condO - case condB: - return condNB - case condNB: - return condB - case condZ: - return condNZ - case condNZ: - return condZ - case condBE: - return condNBE - case condNBE: - return condBE - case condS: - return condNS - case condNS: - return condS - case condP: - return condNP - case condNP: - return condP - case condL: - return condNL - case condNL: - return condL - case condLE: - return condNLE - case condNLE: - return condLE - default: - panic("unreachable") - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/ext.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/ext.go deleted file mode 100644 index 5e731e822..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/ext.go +++ /dev/null @@ -1,35 +0,0 @@ -package amd64 - -// extMode represents the mode of extension in movzx/movsx. -type extMode byte - -const ( - // extModeBL represents Byte -> Longword. - extModeBL extMode = iota - // extModeBQ represents Byte -> Quadword. - extModeBQ - // extModeWL represents Word -> Longword. - extModeWL - // extModeWQ represents Word -> Quadword. - extModeWQ - // extModeLQ represents Longword -> Quadword. - extModeLQ -) - -// String implements fmt.Stringer. -func (e extMode) String() string { - switch e { - case extModeBL: - return "bl" - case extModeBQ: - return "bq" - case extModeWL: - return "wl" - case extModeWQ: - return "wq" - case extModeLQ: - return "lq" - default: - panic("BUG: invalid ext mode") - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go deleted file mode 100644 index 6a3e58f51..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go +++ /dev/null @@ -1,2447 +0,0 @@ -package amd64 - -import ( - "fmt" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -type instruction struct { - prev, next *instruction - op1, op2 operand - u1, u2 uint64 - b1 bool - addedBeforeRegAlloc bool - kind instructionKind -} - -// IsCall implements regalloc.Instr. -func (i *instruction) IsCall() bool { return i.kind == call } - -// IsIndirectCall implements regalloc.Instr. -func (i *instruction) IsIndirectCall() bool { return i.kind == callIndirect } - -// IsReturn implements regalloc.Instr. -func (i *instruction) IsReturn() bool { return i.kind == ret } - -// String implements regalloc.Instr. -func (i *instruction) String() string { - switch i.kind { - case nop0: - return "nop" - case sourceOffsetInfo: - return fmt.Sprintf("source_offset_info %d", i.u1) - case ret: - return "ret" - case imm: - if i.b1 { - return fmt.Sprintf("movabsq $%d, %s", int64(i.u1), i.op2.format(true)) - } else { - return fmt.Sprintf("movl $%d, %s", int32(i.u1), i.op2.format(false)) - } - case aluRmiR: - return fmt.Sprintf("%s %s, %s", aluRmiROpcode(i.u1), i.op1.format(i.b1), i.op2.format(i.b1)) - case movRR: - if i.b1 { - return fmt.Sprintf("movq %s, %s", i.op1.format(true), i.op2.format(true)) - } else { - return fmt.Sprintf("movl %s, %s", i.op1.format(false), i.op2.format(false)) - } - case xmmRmR: - return fmt.Sprintf("%s %s, %s", sseOpcode(i.u1), i.op1.format(false), i.op2.format(false)) - case gprToXmm: - return fmt.Sprintf("%s %s, %s", sseOpcode(i.u1), i.op1.format(i.b1), i.op2.format(i.b1)) - case xmmUnaryRmR: - return fmt.Sprintf("%s %s, %s", sseOpcode(i.u1), i.op1.format(false), i.op2.format(false)) - case xmmUnaryRmRImm: - return fmt.Sprintf("%s $%d, %s, %s", sseOpcode(i.u1), roundingMode(i.u2), i.op1.format(false), i.op2.format(false)) - case unaryRmR: - var suffix string - if i.b1 { - suffix = "q" - } else { - suffix = "l" - } - return fmt.Sprintf("%s%s %s, %s", unaryRmROpcode(i.u1), suffix, i.op1.format(i.b1), i.op2.format(i.b1)) - case not: - var op string - if i.b1 { - op = "notq" - } else { - op = "notl" - } - return fmt.Sprintf("%s %s", op, i.op1.format(i.b1)) - case neg: - var op string - if i.b1 { - op = "negq" - } else { - op = "negl" - } - return fmt.Sprintf("%s %s", op, i.op1.format(i.b1)) - case div: - var prefix string - var op string - if i.b1 { - op = "divq" - } else { - op = "divl" - } - if i.u1 != 0 { - prefix = "i" - } - return fmt.Sprintf("%s%s %s", prefix, op, i.op1.format(i.b1)) - case mulHi: - signed, _64 := i.u1 != 0, i.b1 - var op string - switch { - case signed && _64: - op = "imulq" - case !signed && _64: - op = "mulq" - case signed && !_64: - op = "imull" - case !signed && !_64: - op = "mull" - } - return fmt.Sprintf("%s %s", op, i.op1.format(i.b1)) - case signExtendData: - var op string - if i.b1 { - op = "cqo" - } else { - op = "cdq" - } - return op - case movzxRmR: - return fmt.Sprintf("movzx.%s %s, %s", extMode(i.u1), i.op1.format(true), i.op2.format(true)) - case mov64MR: - return fmt.Sprintf("movq %s, %s", i.op1.format(true), i.op2.format(true)) - case lea: - return fmt.Sprintf("lea %s, %s", i.op1.format(true), i.op2.format(true)) - case movsxRmR: - return fmt.Sprintf("movsx.%s %s, %s", extMode(i.u1), i.op1.format(true), i.op2.format(true)) - case movRM: - var suffix string - switch i.u1 { - case 1: - suffix = "b" - case 2: - suffix = "w" - case 4: - suffix = "l" - case 8: - suffix = "q" - } - return fmt.Sprintf("mov.%s %s, %s", suffix, i.op1.format(true), i.op2.format(true)) - case shiftR: - var suffix string - if i.b1 { - suffix = "q" - } else { - suffix = "l" - } - return fmt.Sprintf("%s%s %s, %s", shiftROp(i.u1), suffix, i.op1.format(false), i.op2.format(i.b1)) - case xmmRmiReg: - return fmt.Sprintf("%s %s, %s", sseOpcode(i.u1), i.op1.format(true), i.op2.format(true)) - case cmpRmiR: - var op, suffix string - if i.u1 != 0 { - op = "cmp" - } else { - op = "test" - } - if i.b1 { - suffix = "q" - } else { - suffix = "l" - } - if op == "test" && i.op1.kind == operandKindMem { - // Print consistently with AT&T syntax. - return fmt.Sprintf("%s%s %s, %s", op, suffix, i.op2.format(i.b1), i.op1.format(i.b1)) - } - return fmt.Sprintf("%s%s %s, %s", op, suffix, i.op1.format(i.b1), i.op2.format(i.b1)) - case setcc: - return fmt.Sprintf("set%s %s", cond(i.u1), i.op2.format(true)) - case cmove: - var suffix string - if i.b1 { - suffix = "q" - } else { - suffix = "l" - } - return fmt.Sprintf("cmov%s%s %s, %s", cond(i.u1), suffix, i.op1.format(i.b1), i.op2.format(i.b1)) - case push64: - return fmt.Sprintf("pushq %s", i.op1.format(true)) - case pop64: - return fmt.Sprintf("popq %s", i.op1.format(true)) - case xmmMovRM: - return fmt.Sprintf("%s %s, %s", sseOpcode(i.u1), i.op1.format(true), i.op2.format(true)) - case xmmLoadConst: - panic("TODO") - case xmmToGpr: - return fmt.Sprintf("%s %s, %s", sseOpcode(i.u1), i.op1.format(i.b1), i.op2.format(i.b1)) - case cvtUint64ToFloatSeq: - panic("TODO") - case cvtFloatToSintSeq: - panic("TODO") - case cvtFloatToUintSeq: - panic("TODO") - case xmmMinMaxSeq: - panic("TODO") - case xmmCmpRmR: - return fmt.Sprintf("%s %s, %s", sseOpcode(i.u1), i.op1.format(false), i.op2.format(false)) - case xmmRmRImm: - op := sseOpcode(i.u1) - r1, r2 := i.op1.format(op == sseOpcodePextrq || op == sseOpcodePinsrq), - i.op2.format(op == sseOpcodePextrq || op == sseOpcodePinsrq) - return fmt.Sprintf("%s $%d, %s, %s", op, i.u2, r1, r2) - case jmp: - return fmt.Sprintf("jmp %s", i.op1.format(true)) - case jmpIf: - return fmt.Sprintf("j%s %s", cond(i.u1), i.op1.format(true)) - case jmpTableIsland: - return fmt.Sprintf("jump_table_island: jmp_table_index=%d", i.u1) - case exitSequence: - return fmt.Sprintf("exit_sequence %s", i.op1.format(true)) - case ud2: - return "ud2" - case call: - return fmt.Sprintf("call %s", ssa.FuncRef(i.u1)) - case callIndirect: - return fmt.Sprintf("callq *%s", i.op1.format(true)) - case xchg: - var suffix string - switch i.u1 { - case 1: - suffix = "b" - case 2: - suffix = "w" - case 4: - suffix = "l" - case 8: - suffix = "q" - } - return fmt.Sprintf("xchg.%s %s, %s", suffix, i.op1.format(true), i.op2.format(true)) - case zeros: - return fmt.Sprintf("xor %s, %s", i.op2.format(true), i.op2.format(true)) - case fcvtToSintSequence: - execCtx, src, tmpGp, tmpGp2, tmpXmm, src64, dst64, sat := i.fcvtToSintSequenceData() - return fmt.Sprintf( - "fcvtToSintSequence execCtx=%s, src=%s, tmpGp=%s, tmpGp2=%s, tmpXmm=%s, src64=%v, dst64=%v, sat=%v", - formatVRegSized(execCtx, true), - formatVRegSized(src, true), - formatVRegSized(tmpGp, true), - formatVRegSized(tmpGp2, true), - formatVRegSized(tmpXmm, true), src64, dst64, sat) - case fcvtToUintSequence: - execCtx, src, tmpGp, tmpGp2, tmpXmm, tmpXmm2, src64, dst64, sat := i.fcvtToUintSequenceData() - return fmt.Sprintf( - "fcvtToUintSequence execCtx=%s, src=%s, tmpGp=%s, tmpGp2=%s, tmpXmm=%s, tmpXmm2=%s, src64=%v, dst64=%v, sat=%v", - formatVRegSized(execCtx, true), - formatVRegSized(src, true), - formatVRegSized(tmpGp, true), - formatVRegSized(tmpGp2, true), - formatVRegSized(tmpXmm, true), - formatVRegSized(tmpXmm2, true), src64, dst64, sat) - case idivRemSequence: - execCtx, divisor, tmpGp, isDiv, signed, _64 := i.idivRemSequenceData() - return fmt.Sprintf("idivRemSequence execCtx=%s, divisor=%s, tmpGp=%s, isDiv=%v, signed=%v, _64=%v", - formatVRegSized(execCtx, true), formatVRegSized(divisor, _64), formatVRegSized(tmpGp, _64), isDiv, signed, _64) - case defineUninitializedReg: - return fmt.Sprintf("defineUninitializedReg %s", i.op2.format(true)) - case xmmCMov: - return fmt.Sprintf("xmmcmov%s %s, %s", cond(i.u1), i.op1.format(true), i.op2.format(true)) - case blendvpd: - return fmt.Sprintf("blendvpd %s, %s, %%xmm0", i.op1.format(false), i.op2.format(false)) - case mfence: - return "mfence" - case lockcmpxchg: - var suffix string - switch i.u1 { - case 1: - suffix = "b" - case 2: - suffix = "w" - case 4: - suffix = "l" - case 8: - suffix = "q" - } - return fmt.Sprintf("lock cmpxchg.%s %s, %s", suffix, i.op1.format(true), i.op2.format(true)) - case lockxadd: - var suffix string - switch i.u1 { - case 1: - suffix = "b" - case 2: - suffix = "w" - case 4: - suffix = "l" - case 8: - suffix = "q" - } - return fmt.Sprintf("lock xadd.%s %s, %s", suffix, i.op1.format(true), i.op2.format(true)) - - case nopUseReg: - return fmt.Sprintf("nop_use_reg %s", i.op1.format(true)) - - default: - panic(fmt.Sprintf("BUG: %d", int(i.kind))) - } -} - -// Defs implements regalloc.Instr. -func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg { - *regs = (*regs)[:0] - switch dk := defKinds[i.kind]; dk { - case defKindNone: - case defKindOp2: - *regs = append(*regs, i.op2.reg()) - case defKindCall: - _, _, retIntRealRegs, retFloatRealRegs, _ := backend.ABIInfoFromUint64(i.u2) - for i := byte(0); i < retIntRealRegs; i++ { - *regs = append(*regs, regInfo.RealRegToVReg[intArgResultRegs[i]]) - } - for i := byte(0); i < retFloatRealRegs; i++ { - *regs = append(*regs, regInfo.RealRegToVReg[floatArgResultRegs[i]]) - } - case defKindDivRem: - _, _, _, isDiv, _, _ := i.idivRemSequenceData() - if isDiv { - *regs = append(*regs, raxVReg) - } else { - *regs = append(*regs, rdxVReg) - } - default: - panic(fmt.Sprintf("BUG: invalid defKind \"%s\" for %s", dk, i)) - } - return *regs -} - -// Uses implements regalloc.Instr. -func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { - *regs = (*regs)[:0] - switch uk := useKinds[i.kind]; uk { - case useKindNone: - case useKindOp1Op2Reg, useKindOp1RegOp2: - opAny, opReg := &i.op1, &i.op2 - if uk == useKindOp1RegOp2 { - opAny, opReg = opReg, opAny - } - // The destination operand (op2) can be only reg, - // the source operand (op1) can be imm32, reg or mem. - switch opAny.kind { - case operandKindReg: - *regs = append(*regs, opAny.reg()) - case operandKindMem: - opAny.addressMode().uses(regs) - case operandKindImm32: - default: - panic(fmt.Sprintf("BUG: invalid operand: %s", i)) - } - if opReg.kind != operandKindReg { - panic(fmt.Sprintf("BUG: invalid operand: %s", i)) - } - *regs = append(*regs, opReg.reg()) - case useKindOp1: - op := i.op1 - switch op.kind { - case operandKindReg: - *regs = append(*regs, op.reg()) - case operandKindMem: - op.addressMode().uses(regs) - case operandKindImm32, operandKindLabel: - default: - panic(fmt.Sprintf("BUG: invalid operand: %s", i)) - } - case useKindCallInd: - op := i.op1 - switch op.kind { - case operandKindReg: - *regs = append(*regs, op.reg()) - case operandKindMem: - op.addressMode().uses(regs) - default: - panic(fmt.Sprintf("BUG: invalid operand: %s", i)) - } - fallthrough - case useKindCall: - argIntRealRegs, argFloatRealRegs, _, _, _ := backend.ABIInfoFromUint64(i.u2) - for i := byte(0); i < argIntRealRegs; i++ { - *regs = append(*regs, regInfo.RealRegToVReg[intArgResultRegs[i]]) - } - for i := byte(0); i < argFloatRealRegs; i++ { - *regs = append(*regs, regInfo.RealRegToVReg[floatArgResultRegs[i]]) - } - case useKindFcvtToSintSequence: - execCtx, src, tmpGp, tmpGp2, tmpXmm, _, _, _ := i.fcvtToSintSequenceData() - *regs = append(*regs, execCtx, src, tmpGp, tmpGp2, tmpXmm) - case useKindFcvtToUintSequence: - execCtx, src, tmpGp, tmpGp2, tmpXmm, tmpXmm2, _, _, _ := i.fcvtToUintSequenceData() - *regs = append(*regs, execCtx, src, tmpGp, tmpGp2, tmpXmm, tmpXmm2) - case useKindDivRem: - execCtx, divisor, tmpGp, _, _, _ := i.idivRemSequenceData() - // idiv uses rax and rdx as implicit operands. - *regs = append(*regs, raxVReg, rdxVReg, execCtx, divisor, tmpGp) - case useKindBlendvpd: - *regs = append(*regs, xmm0VReg) - - opAny, opReg := &i.op1, &i.op2 - switch opAny.kind { - case operandKindReg: - *regs = append(*regs, opAny.reg()) - case operandKindMem: - opAny.addressMode().uses(regs) - default: - panic(fmt.Sprintf("BUG: invalid operand: %s", i)) - } - if opReg.kind != operandKindReg { - panic(fmt.Sprintf("BUG: invalid operand: %s", i)) - } - *regs = append(*regs, opReg.reg()) - - case useKindRaxOp1RegOp2: - opReg, opAny := &i.op1, &i.op2 - *regs = append(*regs, raxVReg, opReg.reg()) - switch opAny.kind { - case operandKindReg: - *regs = append(*regs, opAny.reg()) - case operandKindMem: - opAny.addressMode().uses(regs) - default: - panic(fmt.Sprintf("BUG: invalid operand: %s", i)) - } - if opReg.kind != operandKindReg { - panic(fmt.Sprintf("BUG: invalid operand: %s", i)) - } - - default: - panic(fmt.Sprintf("BUG: invalid useKind %s for %s", uk, i)) - } - return *regs -} - -// AssignUse implements regalloc.Instr. -func (i *instruction) AssignUse(index int, v regalloc.VReg) { - switch uk := useKinds[i.kind]; uk { - case useKindNone: - case useKindCallInd: - if index != 0 { - panic("BUG") - } - op := &i.op1 - switch op.kind { - case operandKindReg: - op.setReg(v) - case operandKindMem: - op.addressMode().assignUses(index, v) - default: - panic("BUG") - } - case useKindOp1Op2Reg, useKindOp1RegOp2: - op, opMustBeReg := &i.op1, &i.op2 - if uk == useKindOp1RegOp2 { - op, opMustBeReg = opMustBeReg, op - } - switch op.kind { - case operandKindReg: - if index == 0 { - op.setReg(v) - } else if index == 1 { - opMustBeReg.setReg(v) - } else { - panic("BUG") - } - case operandKindMem: - nregs := op.addressMode().nregs() - if index < nregs { - op.addressMode().assignUses(index, v) - } else if index == nregs { - opMustBeReg.setReg(v) - } else { - panic("BUG") - } - case operandKindImm32: - if index == 0 { - opMustBeReg.setReg(v) - } else { - panic("BUG") - } - default: - panic(fmt.Sprintf("BUG: invalid operand pair: %s", i)) - } - case useKindOp1: - op := &i.op1 - switch op.kind { - case operandKindReg: - if index != 0 { - panic("BUG") - } - op.setReg(v) - case operandKindMem: - op.addressMode().assignUses(index, v) - default: - panic(fmt.Sprintf("BUG: invalid operand: %s", i)) - } - case useKindFcvtToSintSequence: - switch index { - case 0: - i.op1.addressMode().base = v - case 1: - i.op1.addressMode().index = v - case 2: - i.op2.addressMode().base = v - case 3: - i.op2.addressMode().index = v - case 4: - i.u1 = uint64(v) - default: - panic("BUG") - } - case useKindFcvtToUintSequence: - switch index { - case 0: - i.op1.addressMode().base = v - case 1: - i.op1.addressMode().index = v - case 2: - i.op2.addressMode().base = v - case 3: - i.op2.addressMode().index = v - case 4: - i.u1 = uint64(v) - case 5: - i.u2 = uint64(v) - default: - panic("BUG") - } - case useKindDivRem: - switch index { - case 0: - if v != raxVReg { - panic("BUG") - } - case 1: - if v != rdxVReg { - panic("BUG") - } - case 2: - i.op1.setReg(v) - case 3: - i.op2.setReg(v) - case 4: - i.u1 = uint64(v) - default: - panic("BUG") - } - case useKindBlendvpd: - op, opMustBeReg := &i.op1, &i.op2 - if index == 0 { - if v.RealReg() != xmm0 { - panic("BUG") - } - } else { - switch op.kind { - case operandKindReg: - switch index { - case 1: - op.setReg(v) - case 2: - opMustBeReg.setReg(v) - default: - panic("BUG") - } - case operandKindMem: - nregs := op.addressMode().nregs() - index-- - if index < nregs { - op.addressMode().assignUses(index, v) - } else if index == nregs { - opMustBeReg.setReg(v) - } else { - panic("BUG") - } - default: - panic(fmt.Sprintf("BUG: invalid operand pair: %s", i)) - } - } - - case useKindRaxOp1RegOp2: - switch index { - case 0: - if v.RealReg() != rax { - panic("BUG") - } - case 1: - i.op1.setReg(v) - default: - op := &i.op2 - switch op.kind { - case operandKindReg: - switch index { - case 1: - op.setReg(v) - case 2: - op.setReg(v) - default: - panic("BUG") - } - case operandKindMem: - nregs := op.addressMode().nregs() - index -= 2 - if index < nregs { - op.addressMode().assignUses(index, v) - } else if index == nregs { - op.setReg(v) - } else { - panic("BUG") - } - default: - panic(fmt.Sprintf("BUG: invalid operand pair: %s", i)) - } - } - default: - panic(fmt.Sprintf("BUG: invalid useKind %s for %s", uk, i)) - } -} - -// AssignDef implements regalloc.Instr. -func (i *instruction) AssignDef(reg regalloc.VReg) { - switch dk := defKinds[i.kind]; dk { - case defKindNone: - case defKindOp2: - i.op2.setReg(reg) - default: - panic(fmt.Sprintf("BUG: invalid defKind \"%s\" for %s", dk, i)) - } -} - -// IsCopy implements regalloc.Instr. -func (i *instruction) IsCopy() bool { - k := i.kind - if k == movRR { - return true - } - if k == xmmUnaryRmR { - if i.op1.kind == operandKindReg { - sse := sseOpcode(i.u1) - return sse == sseOpcodeMovss || sse == sseOpcodeMovsd || sse == sseOpcodeMovdqu - } - } - return false -} - -func resetInstruction(i *instruction) { - *i = instruction{} -} - -func (i *instruction) asNop0WithLabel(label label) *instruction { //nolint - i.kind = nop0 - i.u1 = uint64(label) - return i -} - -func (i *instruction) nop0Label() label { - return label(i.u1) -} - -type instructionKind byte - -const ( - nop0 instructionKind = iota + 1 - - // Integer arithmetic/bit-twiddling: (add sub and or xor mul, etc.) (32 64) (reg addr imm) reg - aluRmiR - - // Instructions on GPR that only read src and defines dst (dst is not modified): bsr, etc. - unaryRmR - - // Bitwise not - not - - // Integer negation - neg - - // Integer quotient and remainder: (div idiv) $rax $rdx (reg addr) - div - - // The high bits (RDX) of a (un)signed multiply: RDX:RAX := RAX * rhs. - mulHi - - // Do a sign-extend based on the sign of the value in rax into rdx: (cwd cdq cqo) - // or al into ah: (cbw) - signExtendData - - // Constant materialization: (imm32 imm64) reg. - // Either: movl $imm32, %reg32 or movabsq $imm64, %reg64. - imm - - // GPR to GPR move: mov (64 32) reg reg. - movRR - - // movzxRmR is zero-extended loads or move (R to R), except for 64 bits: movz (bl bq wl wq lq) addr reg. - // Note that the lq variant doesn't really exist since the default zero-extend rule makes it - // unnecessary. For that case we emit the equivalent "movl AM, reg32". - movzxRmR - - // mov64MR is a plain 64-bit integer load, since movzxRmR can't represent that. - mov64MR - - // Loads the memory address of addr into dst. - lea - - // Sign-extended loads and moves: movs (bl bq wl wq lq) addr reg. - movsxRmR - - // Integer stores: mov (b w l q) reg addr. - movRM - - // Arithmetic shifts: (shl shr sar) (b w l q) imm reg. - shiftR - - // Arithmetic SIMD shifts. - xmmRmiReg - - // Integer comparisons/tests: cmp or test (b w l q) (reg addr imm) reg. - cmpRmiR - - // Materializes the requested condition code in the destination reg. - setcc - - // Integer conditional move. - // Overwrites the destination register. - cmove - - // pushq (reg addr imm) - push64 - - // popq reg - pop64 - - // XMM (scalar or vector) binary op: (add sub and or xor mul adc? sbb?) (32 64) (reg addr) reg - xmmRmR - - // XMM (scalar or vector) unary op: mov between XMM registers (32 64) (reg addr) reg. - // - // This differs from xmmRmR in that the dst register of xmmUnaryRmR is not used in the - // computation of the instruction dst value and so does not have to be a previously valid - // value. This is characteristic of mov instructions. - xmmUnaryRmR - - // XMM (scalar or vector) unary op with immediate: roundss, roundsd, etc. - // - // This differs from XMM_RM_R_IMM in that the dst register of - // XmmUnaryRmRImm is not used in the computation of the instruction dst - // value and so does not have to be a previously valid value. - xmmUnaryRmRImm - - // XMM (scalar or vector) unary op (from xmm to mem): stores, movd, movq - xmmMovRM - - // XMM (vector) unary op (to move a constant value into an xmm register): movups - xmmLoadConst - - // XMM (scalar) unary op (from xmm to integer reg): movd, movq, cvtts{s,d}2si - xmmToGpr - - // XMM (scalar) unary op (from integer to float reg): movd, movq, cvtsi2s{s,d} - gprToXmm - - // Converts an unsigned int64 to a float32/float64. - cvtUint64ToFloatSeq - - // Converts a scalar xmm to a signed int32/int64. - cvtFloatToSintSeq - - // Converts a scalar xmm to an unsigned int32/int64. - cvtFloatToUintSeq - - // A sequence to compute min/max with the proper NaN semantics for xmm registers. - xmmMinMaxSeq - - // Float comparisons/tests: cmp (b w l q) (reg addr imm) reg. - xmmCmpRmR - - // A binary XMM instruction with an 8-bit immediate: e.g. cmp (ps pd) imm (reg addr) reg - xmmRmRImm - - // Direct call: call simm32. - // Note that the offset is the relative to the *current RIP*, which points to the first byte of the next instruction. - call - - // Indirect call: callq (reg mem). - callIndirect - - // Return. - ret - - // Jump: jmp (reg, mem, imm32 or label) - jmp - - // Jump conditionally: jcond cond label. - jmpIf - - // jmpTableIsland is to emit the jump table. - jmpTableIsland - - // exitSequence exits the execution and go back to the Go world. - exitSequence - - // An instruction that will always trigger the illegal instruction exception. - ud2 - - // xchg is described in https://www.felixcloutier.com/x86/xchg. - // This instruction uses two operands, where one of them can be a memory address, and swaps their values. - // If the dst is a memory address, the execution is atomic. - xchg - - // lockcmpxchg is the cmpxchg instruction https://www.felixcloutier.com/x86/cmpxchg with a lock prefix. - lockcmpxchg - - // zeros puts zeros into the destination register. This is implemented as xor reg, reg for - // either integer or XMM registers. The reason why we have this instruction instead of using aluRmiR - // is that it requires the already-defined registers. From reg alloc's perspective, this defines - // the destination register and takes no inputs. - zeros - - // sourceOffsetInfo is a dummy instruction to emit source offset info. - // The existence of this instruction does not affect the execution. - sourceOffsetInfo - - // defineUninitializedReg is a no-op instruction that defines a register without a defining instruction. - defineUninitializedReg - - // fcvtToSintSequence is a sequence of instructions to convert a float to a signed integer. - fcvtToSintSequence - - // fcvtToUintSequence is a sequence of instructions to convert a float to an unsigned integer. - fcvtToUintSequence - - // xmmCMov is a conditional move instruction for XMM registers. Lowered after register allocation. - xmmCMov - - // idivRemSequence is a sequence of instructions to compute both the quotient and remainder of a division. - idivRemSequence - - // blendvpd is https://www.felixcloutier.com/x86/blendvpd. - blendvpd - - // mfence is https://www.felixcloutier.com/x86/mfence - mfence - - // lockxadd is xadd https://www.felixcloutier.com/x86/xadd with a lock prefix. - lockxadd - - // nopUseReg is a meta instruction that uses one register and does nothing. - nopUseReg - - instrMax -) - -func (i *instruction) asMFence() *instruction { - i.kind = mfence - return i -} - -func (i *instruction) asNopUseReg(r regalloc.VReg) *instruction { - i.kind = nopUseReg - i.op1 = newOperandReg(r) - return i -} - -func (i *instruction) asIdivRemSequence(execCtx, divisor, tmpGp regalloc.VReg, isDiv, signed, _64 bool) *instruction { - i.kind = idivRemSequence - i.op1 = newOperandReg(execCtx) - i.op2 = newOperandReg(divisor) - i.u1 = uint64(tmpGp) - if isDiv { - i.u2 |= 1 - } - if signed { - i.u2 |= 2 - } - if _64 { - i.u2 |= 4 - } - return i -} - -func (i *instruction) idivRemSequenceData() ( - execCtx, divisor, tmpGp regalloc.VReg, isDiv, signed, _64 bool, -) { - if i.kind != idivRemSequence { - panic("BUG") - } - return i.op1.reg(), i.op2.reg(), regalloc.VReg(i.u1), i.u2&1 != 0, i.u2&2 != 0, i.u2&4 != 0 -} - -func (i *instruction) asXmmCMov(cc cond, x operand, rd regalloc.VReg, size byte) *instruction { - i.kind = xmmCMov - i.op1 = x - i.op2 = newOperandReg(rd) - i.u1 = uint64(cc) - i.u2 = uint64(size) - return i -} - -func (i *instruction) asDefineUninitializedReg(r regalloc.VReg) *instruction { - i.kind = defineUninitializedReg - i.op2 = newOperandReg(r) - return i -} - -func (m *machine) allocateFcvtToUintSequence( - execCtx, src, tmpGp, tmpGp2, tmpXmm, tmpXmm2 regalloc.VReg, - src64, dst64, sat bool, -) *instruction { - i := m.allocateInstr() - i.kind = fcvtToUintSequence - op1a := m.amodePool.Allocate() - op2a := m.amodePool.Allocate() - i.op1 = newOperandMem(op1a) - i.op2 = newOperandMem(op2a) - if src64 { - op1a.imm32 = 1 - } else { - op1a.imm32 = 0 - } - if dst64 { - op1a.imm32 |= 2 - } - if sat { - op1a.imm32 |= 4 - } - - op1a.base = execCtx - op1a.index = src - op2a.base = tmpGp - op2a.index = tmpGp2 - i.u1 = uint64(tmpXmm) - i.u2 = uint64(tmpXmm2) - return i -} - -func (i *instruction) fcvtToUintSequenceData() ( - execCtx, src, tmpGp, tmpGp2, tmpXmm, tmpXmm2 regalloc.VReg, src64, dst64, sat bool, -) { - if i.kind != fcvtToUintSequence { - panic("BUG") - } - op1a := i.op1.addressMode() - op2a := i.op2.addressMode() - return op1a.base, op1a.index, op2a.base, op2a.index, regalloc.VReg(i.u1), regalloc.VReg(i.u2), - op1a.imm32&1 != 0, op1a.imm32&2 != 0, op1a.imm32&4 != 0 -} - -func (m *machine) allocateFcvtToSintSequence( - execCtx, src, tmpGp, tmpGp2, tmpXmm regalloc.VReg, - src64, dst64, sat bool, -) *instruction { - i := m.allocateInstr() - i.kind = fcvtToSintSequence - op1a := m.amodePool.Allocate() - op2a := m.amodePool.Allocate() - i.op1 = newOperandMem(op1a) - i.op2 = newOperandMem(op2a) - op1a.base = execCtx - op1a.index = src - op2a.base = tmpGp - op2a.index = tmpGp2 - i.u1 = uint64(tmpXmm) - if src64 { - i.u2 = 1 - } else { - i.u2 = 0 - } - if dst64 { - i.u2 |= 2 - } - if sat { - i.u2 |= 4 - } - return i -} - -func (i *instruction) fcvtToSintSequenceData() ( - execCtx, src, tmpGp, tmpGp2, tmpXmm regalloc.VReg, src64, dst64, sat bool, -) { - if i.kind != fcvtToSintSequence { - panic("BUG") - } - op1a := i.op1.addressMode() - op2a := i.op2.addressMode() - return op1a.base, op1a.index, op2a.base, op2a.index, regalloc.VReg(i.u1), - i.u2&1 != 0, i.u2&2 != 0, i.u2&4 != 0 -} - -func (k instructionKind) String() string { - switch k { - case nop0: - return "nop" - case ret: - return "ret" - case imm: - return "imm" - case aluRmiR: - return "aluRmiR" - case movRR: - return "movRR" - case xmmRmR: - return "xmmRmR" - case gprToXmm: - return "gprToXmm" - case xmmUnaryRmR: - return "xmmUnaryRmR" - case xmmUnaryRmRImm: - return "xmmUnaryRmRImm" - case unaryRmR: - return "unaryRmR" - case not: - return "not" - case neg: - return "neg" - case div: - return "div" - case mulHi: - return "mulHi" - case signExtendData: - return "signExtendData" - case movzxRmR: - return "movzxRmR" - case mov64MR: - return "mov64MR" - case lea: - return "lea" - case movsxRmR: - return "movsxRmR" - case movRM: - return "movRM" - case shiftR: - return "shiftR" - case xmmRmiReg: - return "xmmRmiReg" - case cmpRmiR: - return "cmpRmiR" - case setcc: - return "setcc" - case cmove: - return "cmove" - case push64: - return "push64" - case pop64: - return "pop64" - case xmmMovRM: - return "xmmMovRM" - case xmmLoadConst: - return "xmmLoadConst" - case xmmToGpr: - return "xmmToGpr" - case cvtUint64ToFloatSeq: - return "cvtUint64ToFloatSeq" - case cvtFloatToSintSeq: - return "cvtFloatToSintSeq" - case cvtFloatToUintSeq: - return "cvtFloatToUintSeq" - case xmmMinMaxSeq: - return "xmmMinMaxSeq" - case xmmCmpRmR: - return "xmmCmpRmR" - case xmmRmRImm: - return "xmmRmRImm" - case jmpIf: - return "jmpIf" - case jmp: - return "jmp" - case jmpTableIsland: - return "jmpTableIsland" - case exitSequence: - return "exit_sequence" - case ud2: - return "ud2" - case xchg: - return "xchg" - case zeros: - return "zeros" - case fcvtToSintSequence: - return "fcvtToSintSequence" - case fcvtToUintSequence: - return "fcvtToUintSequence" - case xmmCMov: - return "xmmCMov" - case idivRemSequence: - return "idivRemSequence" - case mfence: - return "mfence" - case lockcmpxchg: - return "lockcmpxchg" - case lockxadd: - return "lockxadd" - default: - panic("BUG") - } -} - -type aluRmiROpcode byte - -const ( - aluRmiROpcodeAdd aluRmiROpcode = iota + 1 - aluRmiROpcodeSub - aluRmiROpcodeAnd - aluRmiROpcodeOr - aluRmiROpcodeXor - aluRmiROpcodeMul -) - -func (a aluRmiROpcode) String() string { - switch a { - case aluRmiROpcodeAdd: - return "add" - case aluRmiROpcodeSub: - return "sub" - case aluRmiROpcodeAnd: - return "and" - case aluRmiROpcodeOr: - return "or" - case aluRmiROpcodeXor: - return "xor" - case aluRmiROpcodeMul: - return "imul" - default: - panic("BUG") - } -} - -func (i *instruction) asJmpIf(cond cond, target operand) *instruction { - i.kind = jmpIf - i.u1 = uint64(cond) - i.op1 = target - return i -} - -// asJmpTableSequence is used to emit the jump table. -// targetSliceIndex is the index of the target slice in machine.jmpTableTargets. -func (i *instruction) asJmpTableSequence(targetSliceIndex int, targetCount int) *instruction { - i.kind = jmpTableIsland - i.u1 = uint64(targetSliceIndex) - i.u2 = uint64(targetCount) - return i -} - -func (i *instruction) asJmp(target operand) *instruction { - i.kind = jmp - i.op1 = target - return i -} - -func (i *instruction) jmpLabel() label { - switch i.kind { - case jmp, jmpIf, lea, xmmUnaryRmR: - return i.op1.label() - default: - panic("BUG") - } -} - -func (i *instruction) asLEA(target operand, rd regalloc.VReg) *instruction { - i.kind = lea - i.op1 = target - i.op2 = newOperandReg(rd) - return i -} - -func (i *instruction) asCall(ref ssa.FuncRef, abi *backend.FunctionABI) *instruction { - i.kind = call - i.u1 = uint64(ref) - if abi != nil { - i.u2 = abi.ABIInfoAsUint64() - } - return i -} - -func (i *instruction) asCallIndirect(ptr operand, abi *backend.FunctionABI) *instruction { - if ptr.kind != operandKindReg && ptr.kind != operandKindMem { - panic("BUG") - } - i.kind = callIndirect - i.op1 = ptr - if abi != nil { - i.u2 = abi.ABIInfoAsUint64() - } - return i -} - -func (i *instruction) asRet() *instruction { - i.kind = ret - return i -} - -func (i *instruction) asImm(dst regalloc.VReg, value uint64, _64 bool) *instruction { - i.kind = imm - i.op2 = newOperandReg(dst) - i.u1 = value - i.b1 = _64 - return i -} - -func (i *instruction) asAluRmiR(op aluRmiROpcode, rm operand, rd regalloc.VReg, _64 bool) *instruction { - if rm.kind != operandKindReg && rm.kind != operandKindMem && rm.kind != operandKindImm32 { - panic("BUG") - } - i.kind = aluRmiR - i.op1 = rm - i.op2 = newOperandReg(rd) - i.u1 = uint64(op) - i.b1 = _64 - return i -} - -func (i *instruction) asZeros(dst regalloc.VReg) *instruction { - i.kind = zeros - i.op2 = newOperandReg(dst) - return i -} - -func (i *instruction) asBlendvpd(rm operand, rd regalloc.VReg) *instruction { - if rm.kind != operandKindReg && rm.kind != operandKindMem { - panic("BUG") - } - i.kind = blendvpd - i.op1 = rm - i.op2 = newOperandReg(rd) - return i -} - -func (i *instruction) asXmmRmR(op sseOpcode, rm operand, rd regalloc.VReg) *instruction { - if rm.kind != operandKindReg && rm.kind != operandKindMem { - panic("BUG") - } - i.kind = xmmRmR - i.op1 = rm - i.op2 = newOperandReg(rd) - i.u1 = uint64(op) - return i -} - -func (i *instruction) asXmmRmRImm(op sseOpcode, imm uint8, rm operand, rd regalloc.VReg) *instruction { - if rm.kind != operandKindReg && rm.kind != operandKindMem { - panic("BUG") - } - i.kind = xmmRmRImm - i.op1 = rm - i.op2 = newOperandReg(rd) - i.u1 = uint64(op) - i.u2 = uint64(imm) - return i -} - -func (i *instruction) asGprToXmm(op sseOpcode, rm operand, rd regalloc.VReg, _64 bool) *instruction { - if rm.kind != operandKindReg && rm.kind != operandKindMem { - panic("BUG") - } - i.kind = gprToXmm - i.op1 = rm - i.op2 = newOperandReg(rd) - i.u1 = uint64(op) - i.b1 = _64 - return i -} - -func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction { - i.kind = sourceOffsetInfo - i.u1 = uint64(l) - return i -} - -func (i *instruction) sourceOffsetInfo() ssa.SourceOffset { - return ssa.SourceOffset(i.u1) -} - -func (i *instruction) asXmmToGpr(op sseOpcode, rm, rd regalloc.VReg, _64 bool) *instruction { - i.kind = xmmToGpr - i.op1 = newOperandReg(rm) - i.op2 = newOperandReg(rd) - i.u1 = uint64(op) - i.b1 = _64 - return i -} - -func (i *instruction) asMovRM(rm regalloc.VReg, rd operand, size byte) *instruction { - if rd.kind != operandKindMem { - panic("BUG") - } - i.kind = movRM - i.op1 = newOperandReg(rm) - i.op2 = rd - i.u1 = uint64(size) - return i -} - -func (i *instruction) asMovsxRmR(ext extMode, src operand, rd regalloc.VReg) *instruction { - if src.kind != operandKindReg && src.kind != operandKindMem { - panic("BUG") - } - i.kind = movsxRmR - i.op1 = src - i.op2 = newOperandReg(rd) - i.u1 = uint64(ext) - return i -} - -func (i *instruction) asMovzxRmR(ext extMode, src operand, rd regalloc.VReg) *instruction { - if src.kind != operandKindReg && src.kind != operandKindMem { - panic("BUG") - } - i.kind = movzxRmR - i.op1 = src - i.op2 = newOperandReg(rd) - i.u1 = uint64(ext) - return i -} - -func (i *instruction) asSignExtendData(_64 bool) *instruction { - i.kind = signExtendData - i.b1 = _64 - return i -} - -func (i *instruction) asUD2() *instruction { - i.kind = ud2 - return i -} - -func (i *instruction) asDiv(rn operand, signed bool, _64 bool) *instruction { - i.kind = div - i.op1 = rn - i.b1 = _64 - if signed { - i.u1 = 1 - } - return i -} - -func (i *instruction) asMov64MR(rm operand, rd regalloc.VReg) *instruction { - if rm.kind != operandKindMem { - panic("BUG") - } - i.kind = mov64MR - i.op1 = rm - i.op2 = newOperandReg(rd) - return i -} - -func (i *instruction) asMovRR(rm, rd regalloc.VReg, _64 bool) *instruction { - i.kind = movRR - i.op1 = newOperandReg(rm) - i.op2 = newOperandReg(rd) - i.b1 = _64 - return i -} - -func (i *instruction) asNot(rm operand, _64 bool) *instruction { - if rm.kind != operandKindReg && rm.kind != operandKindMem { - panic("BUG") - } - i.kind = not - i.op1 = rm - i.b1 = _64 - return i -} - -func (i *instruction) asNeg(rm operand, _64 bool) *instruction { - if rm.kind != operandKindReg && rm.kind != operandKindMem { - panic("BUG") - } - i.kind = neg - i.op1 = rm - i.b1 = _64 - return i -} - -func (i *instruction) asMulHi(rm operand, signed, _64 bool) *instruction { - if rm.kind != operandKindReg && (rm.kind != operandKindMem) { - panic("BUG") - } - i.kind = mulHi - i.op1 = rm - i.b1 = _64 - if signed { - i.u1 = 1 - } - return i -} - -func (i *instruction) asUnaryRmR(op unaryRmROpcode, rm operand, rd regalloc.VReg, _64 bool) *instruction { - if rm.kind != operandKindReg && rm.kind != operandKindMem { - panic("BUG") - } - i.kind = unaryRmR - i.op1 = rm - i.op2 = newOperandReg(rd) - i.u1 = uint64(op) - i.b1 = _64 - return i -} - -func (i *instruction) asShiftR(op shiftROp, amount operand, rd regalloc.VReg, _64 bool) *instruction { - if amount.kind != operandKindReg && amount.kind != operandKindImm32 { - panic("BUG") - } - i.kind = shiftR - i.op1 = amount - i.op2 = newOperandReg(rd) - i.u1 = uint64(op) - i.b1 = _64 - return i -} - -func (i *instruction) asXmmRmiReg(op sseOpcode, rm operand, rd regalloc.VReg) *instruction { - if rm.kind != operandKindReg && rm.kind != operandKindImm32 && rm.kind != operandKindMem { - panic("BUG") - } - i.kind = xmmRmiReg - i.op1 = rm - i.op2 = newOperandReg(rd) - i.u1 = uint64(op) - return i -} - -func (i *instruction) asCmpRmiR(cmp bool, rm operand, rn regalloc.VReg, _64 bool) *instruction { - if rm.kind != operandKindReg && rm.kind != operandKindImm32 && rm.kind != operandKindMem { - panic("BUG") - } - i.kind = cmpRmiR - i.op1 = rm - i.op2 = newOperandReg(rn) - if cmp { - i.u1 = 1 - } - i.b1 = _64 - return i -} - -func (i *instruction) asSetcc(c cond, rd regalloc.VReg) *instruction { - i.kind = setcc - i.op2 = newOperandReg(rd) - i.u1 = uint64(c) - return i -} - -func (i *instruction) asCmove(c cond, rm operand, rd regalloc.VReg, _64 bool) *instruction { - i.kind = cmove - i.op1 = rm - i.op2 = newOperandReg(rd) - i.u1 = uint64(c) - i.b1 = _64 - return i -} - -func (m *machine) allocateExitSeq(execCtx regalloc.VReg) *instruction { - i := m.allocateInstr() - i.kind = exitSequence - i.op1 = newOperandReg(execCtx) - // Allocate the address mode that will be used in encoding the exit sequence. - i.op2 = newOperandMem(m.amodePool.Allocate()) - return i -} - -func (i *instruction) asXmmUnaryRmR(op sseOpcode, rm operand, rd regalloc.VReg) *instruction { - if rm.kind != operandKindReg && rm.kind != operandKindMem { - panic("BUG") - } - i.kind = xmmUnaryRmR - i.op1 = rm - i.op2 = newOperandReg(rd) - i.u1 = uint64(op) - return i -} - -func (i *instruction) asXmmUnaryRmRImm(op sseOpcode, imm byte, rm operand, rd regalloc.VReg) *instruction { - if rm.kind != operandKindReg && rm.kind != operandKindMem { - panic("BUG") - } - i.kind = xmmUnaryRmRImm - i.op1 = rm - i.op2 = newOperandReg(rd) - i.u1 = uint64(op) - i.u2 = uint64(imm) - return i -} - -func (i *instruction) asXmmCmpRmR(op sseOpcode, rm operand, rd regalloc.VReg) *instruction { - if rm.kind != operandKindReg && rm.kind != operandKindMem { - panic("BUG") - } - i.kind = xmmCmpRmR - i.op1 = rm - i.op2 = newOperandReg(rd) - i.u1 = uint64(op) - return i -} - -func (i *instruction) asXmmMovRM(op sseOpcode, rm regalloc.VReg, rd operand) *instruction { - if rd.kind != operandKindMem { - panic("BUG") - } - i.kind = xmmMovRM - i.op1 = newOperandReg(rm) - i.op2 = rd - i.u1 = uint64(op) - return i -} - -func (i *instruction) asPop64(rm regalloc.VReg) *instruction { - i.kind = pop64 - i.op1 = newOperandReg(rm) - return i -} - -func (i *instruction) asPush64(op operand) *instruction { - if op.kind != operandKindReg && op.kind != operandKindMem && op.kind != operandKindImm32 { - panic("BUG") - } - i.kind = push64 - i.op1 = op - return i -} - -func (i *instruction) asXCHG(rm regalloc.VReg, rd operand, size byte) *instruction { - i.kind = xchg - i.op1 = newOperandReg(rm) - i.op2 = rd - i.u1 = uint64(size) - return i -} - -func (i *instruction) asLockCmpXCHG(rm regalloc.VReg, rd *amode, size byte) *instruction { - i.kind = lockcmpxchg - i.op1 = newOperandReg(rm) - i.op2 = newOperandMem(rd) - i.u1 = uint64(size) - return i -} - -func (i *instruction) asLockXAdd(rm regalloc.VReg, rd *amode, size byte) *instruction { - i.kind = lockxadd - i.op1 = newOperandReg(rm) - i.op2 = newOperandMem(rd) - i.u1 = uint64(size) - return i -} - -type unaryRmROpcode byte - -const ( - unaryRmROpcodeBsr unaryRmROpcode = iota - unaryRmROpcodeBsf - unaryRmROpcodeLzcnt - unaryRmROpcodeTzcnt - unaryRmROpcodePopcnt -) - -func (u unaryRmROpcode) String() string { - switch u { - case unaryRmROpcodeBsr: - return "bsr" - case unaryRmROpcodeBsf: - return "bsf" - case unaryRmROpcodeLzcnt: - return "lzcnt" - case unaryRmROpcodeTzcnt: - return "tzcnt" - case unaryRmROpcodePopcnt: - return "popcnt" - default: - panic("BUG") - } -} - -type shiftROp byte - -const ( - shiftROpRotateLeft shiftROp = 0 - shiftROpRotateRight shiftROp = 1 - shiftROpShiftLeft shiftROp = 4 - shiftROpShiftRightLogical shiftROp = 5 - shiftROpShiftRightArithmetic shiftROp = 7 -) - -func (s shiftROp) String() string { - switch s { - case shiftROpRotateLeft: - return "rol" - case shiftROpRotateRight: - return "ror" - case shiftROpShiftLeft: - return "shl" - case shiftROpShiftRightLogical: - return "shr" - case shiftROpShiftRightArithmetic: - return "sar" - default: - panic("BUG") - } -} - -type sseOpcode byte - -const ( - sseOpcodeInvalid sseOpcode = iota - sseOpcodeAddps - sseOpcodeAddpd - sseOpcodeAddss - sseOpcodeAddsd - sseOpcodeAndps - sseOpcodeAndpd - sseOpcodeAndnps - sseOpcodeAndnpd - sseOpcodeBlendvps - sseOpcodeBlendvpd - sseOpcodeComiss - sseOpcodeComisd - sseOpcodeCmpps - sseOpcodeCmppd - sseOpcodeCmpss - sseOpcodeCmpsd - sseOpcodeCvtdq2ps - sseOpcodeCvtdq2pd - sseOpcodeCvtsd2ss - sseOpcodeCvtsd2si - sseOpcodeCvtsi2ss - sseOpcodeCvtsi2sd - sseOpcodeCvtss2si - sseOpcodeCvtss2sd - sseOpcodeCvttps2dq - sseOpcodeCvttss2si - sseOpcodeCvttsd2si - sseOpcodeDivps - sseOpcodeDivpd - sseOpcodeDivss - sseOpcodeDivsd - sseOpcodeInsertps - sseOpcodeMaxps - sseOpcodeMaxpd - sseOpcodeMaxss - sseOpcodeMaxsd - sseOpcodeMinps - sseOpcodeMinpd - sseOpcodeMinss - sseOpcodeMinsd - sseOpcodeMovaps - sseOpcodeMovapd - sseOpcodeMovd - sseOpcodeMovdqa - sseOpcodeMovdqu - sseOpcodeMovlhps - sseOpcodeMovmskps - sseOpcodeMovmskpd - sseOpcodeMovq - sseOpcodeMovss - sseOpcodeMovsd - sseOpcodeMovups - sseOpcodeMovupd - sseOpcodeMulps - sseOpcodeMulpd - sseOpcodeMulss - sseOpcodeMulsd - sseOpcodeOrps - sseOpcodeOrpd - sseOpcodePabsb - sseOpcodePabsw - sseOpcodePabsd - sseOpcodePackssdw - sseOpcodePacksswb - sseOpcodePackusdw - sseOpcodePackuswb - sseOpcodePaddb - sseOpcodePaddd - sseOpcodePaddq - sseOpcodePaddw - sseOpcodePaddsb - sseOpcodePaddsw - sseOpcodePaddusb - sseOpcodePaddusw - sseOpcodePalignr - sseOpcodePand - sseOpcodePandn - sseOpcodePavgb - sseOpcodePavgw - sseOpcodePcmpeqb - sseOpcodePcmpeqw - sseOpcodePcmpeqd - sseOpcodePcmpeqq - sseOpcodePcmpgtb - sseOpcodePcmpgtw - sseOpcodePcmpgtd - sseOpcodePcmpgtq - sseOpcodePextrb - sseOpcodePextrw - sseOpcodePextrd - sseOpcodePextrq - sseOpcodePinsrb - sseOpcodePinsrw - sseOpcodePinsrd - sseOpcodePinsrq - sseOpcodePmaddwd - sseOpcodePmaxsb - sseOpcodePmaxsw - sseOpcodePmaxsd - sseOpcodePmaxub - sseOpcodePmaxuw - sseOpcodePmaxud - sseOpcodePminsb - sseOpcodePminsw - sseOpcodePminsd - sseOpcodePminub - sseOpcodePminuw - sseOpcodePminud - sseOpcodePmovmskb - sseOpcodePmovsxbd - sseOpcodePmovsxbw - sseOpcodePmovsxbq - sseOpcodePmovsxwd - sseOpcodePmovsxwq - sseOpcodePmovsxdq - sseOpcodePmovzxbd - sseOpcodePmovzxbw - sseOpcodePmovzxbq - sseOpcodePmovzxwd - sseOpcodePmovzxwq - sseOpcodePmovzxdq - sseOpcodePmulld - sseOpcodePmullw - sseOpcodePmuludq - sseOpcodePor - sseOpcodePshufb - sseOpcodePshufd - sseOpcodePsllw - sseOpcodePslld - sseOpcodePsllq - sseOpcodePsraw - sseOpcodePsrad - sseOpcodePsrlw - sseOpcodePsrld - sseOpcodePsrlq - sseOpcodePsubb - sseOpcodePsubd - sseOpcodePsubq - sseOpcodePsubw - sseOpcodePsubsb - sseOpcodePsubsw - sseOpcodePsubusb - sseOpcodePsubusw - sseOpcodePtest - sseOpcodePunpckhbw - sseOpcodePunpcklbw - sseOpcodePxor - sseOpcodeRcpss - sseOpcodeRoundps - sseOpcodeRoundpd - sseOpcodeRoundss - sseOpcodeRoundsd - sseOpcodeRsqrtss - sseOpcodeSqrtps - sseOpcodeSqrtpd - sseOpcodeSqrtss - sseOpcodeSqrtsd - sseOpcodeSubps - sseOpcodeSubpd - sseOpcodeSubss - sseOpcodeSubsd - sseOpcodeUcomiss - sseOpcodeUcomisd - sseOpcodeXorps - sseOpcodeXorpd - sseOpcodePmulhrsw - sseOpcodeUnpcklps - sseOpcodeCvtps2pd - sseOpcodeCvtpd2ps - sseOpcodeCvttpd2dq - sseOpcodeShufps - sseOpcodePmaddubsw -) - -func (s sseOpcode) String() string { - switch s { - case sseOpcodeInvalid: - return "invalid" - case sseOpcodeAddps: - return "addps" - case sseOpcodeAddpd: - return "addpd" - case sseOpcodeAddss: - return "addss" - case sseOpcodeAddsd: - return "addsd" - case sseOpcodeAndps: - return "andps" - case sseOpcodeAndpd: - return "andpd" - case sseOpcodeAndnps: - return "andnps" - case sseOpcodeAndnpd: - return "andnpd" - case sseOpcodeBlendvps: - return "blendvps" - case sseOpcodeBlendvpd: - return "blendvpd" - case sseOpcodeComiss: - return "comiss" - case sseOpcodeComisd: - return "comisd" - case sseOpcodeCmpps: - return "cmpps" - case sseOpcodeCmppd: - return "cmppd" - case sseOpcodeCmpss: - return "cmpss" - case sseOpcodeCmpsd: - return "cmpsd" - case sseOpcodeCvtdq2ps: - return "cvtdq2ps" - case sseOpcodeCvtdq2pd: - return "cvtdq2pd" - case sseOpcodeCvtsd2ss: - return "cvtsd2ss" - case sseOpcodeCvtsd2si: - return "cvtsd2si" - case sseOpcodeCvtsi2ss: - return "cvtsi2ss" - case sseOpcodeCvtsi2sd: - return "cvtsi2sd" - case sseOpcodeCvtss2si: - return "cvtss2si" - case sseOpcodeCvtss2sd: - return "cvtss2sd" - case sseOpcodeCvttps2dq: - return "cvttps2dq" - case sseOpcodeCvttss2si: - return "cvttss2si" - case sseOpcodeCvttsd2si: - return "cvttsd2si" - case sseOpcodeDivps: - return "divps" - case sseOpcodeDivpd: - return "divpd" - case sseOpcodeDivss: - return "divss" - case sseOpcodeDivsd: - return "divsd" - case sseOpcodeInsertps: - return "insertps" - case sseOpcodeMaxps: - return "maxps" - case sseOpcodeMaxpd: - return "maxpd" - case sseOpcodeMaxss: - return "maxss" - case sseOpcodeMaxsd: - return "maxsd" - case sseOpcodeMinps: - return "minps" - case sseOpcodeMinpd: - return "minpd" - case sseOpcodeMinss: - return "minss" - case sseOpcodeMinsd: - return "minsd" - case sseOpcodeMovaps: - return "movaps" - case sseOpcodeMovapd: - return "movapd" - case sseOpcodeMovd: - return "movd" - case sseOpcodeMovdqa: - return "movdqa" - case sseOpcodeMovdqu: - return "movdqu" - case sseOpcodeMovlhps: - return "movlhps" - case sseOpcodeMovmskps: - return "movmskps" - case sseOpcodeMovmskpd: - return "movmskpd" - case sseOpcodeMovq: - return "movq" - case sseOpcodeMovss: - return "movss" - case sseOpcodeMovsd: - return "movsd" - case sseOpcodeMovups: - return "movups" - case sseOpcodeMovupd: - return "movupd" - case sseOpcodeMulps: - return "mulps" - case sseOpcodeMulpd: - return "mulpd" - case sseOpcodeMulss: - return "mulss" - case sseOpcodeMulsd: - return "mulsd" - case sseOpcodeOrps: - return "orps" - case sseOpcodeOrpd: - return "orpd" - case sseOpcodePabsb: - return "pabsb" - case sseOpcodePabsw: - return "pabsw" - case sseOpcodePabsd: - return "pabsd" - case sseOpcodePackssdw: - return "packssdw" - case sseOpcodePacksswb: - return "packsswb" - case sseOpcodePackusdw: - return "packusdw" - case sseOpcodePackuswb: - return "packuswb" - case sseOpcodePaddb: - return "paddb" - case sseOpcodePaddd: - return "paddd" - case sseOpcodePaddq: - return "paddq" - case sseOpcodePaddw: - return "paddw" - case sseOpcodePaddsb: - return "paddsb" - case sseOpcodePaddsw: - return "paddsw" - case sseOpcodePaddusb: - return "paddusb" - case sseOpcodePaddusw: - return "paddusw" - case sseOpcodePalignr: - return "palignr" - case sseOpcodePand: - return "pand" - case sseOpcodePandn: - return "pandn" - case sseOpcodePavgb: - return "pavgb" - case sseOpcodePavgw: - return "pavgw" - case sseOpcodePcmpeqb: - return "pcmpeqb" - case sseOpcodePcmpeqw: - return "pcmpeqw" - case sseOpcodePcmpeqd: - return "pcmpeqd" - case sseOpcodePcmpeqq: - return "pcmpeqq" - case sseOpcodePcmpgtb: - return "pcmpgtb" - case sseOpcodePcmpgtw: - return "pcmpgtw" - case sseOpcodePcmpgtd: - return "pcmpgtd" - case sseOpcodePcmpgtq: - return "pcmpgtq" - case sseOpcodePextrb: - return "pextrb" - case sseOpcodePextrw: - return "pextrw" - case sseOpcodePextrd: - return "pextrd" - case sseOpcodePextrq: - return "pextrq" - case sseOpcodePinsrb: - return "pinsrb" - case sseOpcodePinsrw: - return "pinsrw" - case sseOpcodePinsrd: - return "pinsrd" - case sseOpcodePinsrq: - return "pinsrq" - case sseOpcodePmaddwd: - return "pmaddwd" - case sseOpcodePmaxsb: - return "pmaxsb" - case sseOpcodePmaxsw: - return "pmaxsw" - case sseOpcodePmaxsd: - return "pmaxsd" - case sseOpcodePmaxub: - return "pmaxub" - case sseOpcodePmaxuw: - return "pmaxuw" - case sseOpcodePmaxud: - return "pmaxud" - case sseOpcodePminsb: - return "pminsb" - case sseOpcodePminsw: - return "pminsw" - case sseOpcodePminsd: - return "pminsd" - case sseOpcodePminub: - return "pminub" - case sseOpcodePminuw: - return "pminuw" - case sseOpcodePminud: - return "pminud" - case sseOpcodePmovmskb: - return "pmovmskb" - case sseOpcodePmovsxbd: - return "pmovsxbd" - case sseOpcodePmovsxbw: - return "pmovsxbw" - case sseOpcodePmovsxbq: - return "pmovsxbq" - case sseOpcodePmovsxwd: - return "pmovsxwd" - case sseOpcodePmovsxwq: - return "pmovsxwq" - case sseOpcodePmovsxdq: - return "pmovsxdq" - case sseOpcodePmovzxbd: - return "pmovzxbd" - case sseOpcodePmovzxbw: - return "pmovzxbw" - case sseOpcodePmovzxbq: - return "pmovzxbq" - case sseOpcodePmovzxwd: - return "pmovzxwd" - case sseOpcodePmovzxwq: - return "pmovzxwq" - case sseOpcodePmovzxdq: - return "pmovzxdq" - case sseOpcodePmulld: - return "pmulld" - case sseOpcodePmullw: - return "pmullw" - case sseOpcodePmuludq: - return "pmuludq" - case sseOpcodePor: - return "por" - case sseOpcodePshufb: - return "pshufb" - case sseOpcodePshufd: - return "pshufd" - case sseOpcodePsllw: - return "psllw" - case sseOpcodePslld: - return "pslld" - case sseOpcodePsllq: - return "psllq" - case sseOpcodePsraw: - return "psraw" - case sseOpcodePsrad: - return "psrad" - case sseOpcodePsrlw: - return "psrlw" - case sseOpcodePsrld: - return "psrld" - case sseOpcodePsrlq: - return "psrlq" - case sseOpcodePsubb: - return "psubb" - case sseOpcodePsubd: - return "psubd" - case sseOpcodePsubq: - return "psubq" - case sseOpcodePsubw: - return "psubw" - case sseOpcodePsubsb: - return "psubsb" - case sseOpcodePsubsw: - return "psubsw" - case sseOpcodePsubusb: - return "psubusb" - case sseOpcodePsubusw: - return "psubusw" - case sseOpcodePtest: - return "ptest" - case sseOpcodePunpckhbw: - return "punpckhbw" - case sseOpcodePunpcklbw: - return "punpcklbw" - case sseOpcodePxor: - return "pxor" - case sseOpcodeRcpss: - return "rcpss" - case sseOpcodeRoundps: - return "roundps" - case sseOpcodeRoundpd: - return "roundpd" - case sseOpcodeRoundss: - return "roundss" - case sseOpcodeRoundsd: - return "roundsd" - case sseOpcodeRsqrtss: - return "rsqrtss" - case sseOpcodeSqrtps: - return "sqrtps" - case sseOpcodeSqrtpd: - return "sqrtpd" - case sseOpcodeSqrtss: - return "sqrtss" - case sseOpcodeSqrtsd: - return "sqrtsd" - case sseOpcodeSubps: - return "subps" - case sseOpcodeSubpd: - return "subpd" - case sseOpcodeSubss: - return "subss" - case sseOpcodeSubsd: - return "subsd" - case sseOpcodeUcomiss: - return "ucomiss" - case sseOpcodeUcomisd: - return "ucomisd" - case sseOpcodeXorps: - return "xorps" - case sseOpcodeXorpd: - return "xorpd" - case sseOpcodePmulhrsw: - return "pmulhrsw" - case sseOpcodeUnpcklps: - return "unpcklps" - case sseOpcodeCvtps2pd: - return "cvtps2pd" - case sseOpcodeCvtpd2ps: - return "cvtpd2ps" - case sseOpcodeCvttpd2dq: - return "cvttpd2dq" - case sseOpcodeShufps: - return "shufps" - case sseOpcodePmaddubsw: - return "pmaddubsw" - default: - panic("BUG") - } -} - -type roundingMode uint8 - -const ( - roundingModeNearest roundingMode = iota - roundingModeDown - roundingModeUp - roundingModeZero -) - -func (r roundingMode) String() string { - switch r { - case roundingModeNearest: - return "nearest" - case roundingModeDown: - return "down" - case roundingModeUp: - return "up" - case roundingModeZero: - return "zero" - default: - panic("BUG") - } -} - -// cmpPred is the immediate value for a comparison operation in xmmRmRImm. -type cmpPred uint8 - -const ( - // cmpPredEQ_OQ is Equal (ordered, non-signaling) - cmpPredEQ_OQ cmpPred = iota - // cmpPredLT_OS is Less-than (ordered, signaling) - cmpPredLT_OS - // cmpPredLE_OS is Less-than-or-equal (ordered, signaling) - cmpPredLE_OS - // cmpPredUNORD_Q is Unordered (non-signaling) - cmpPredUNORD_Q - // cmpPredNEQ_UQ is Not-equal (unordered, non-signaling) - cmpPredNEQ_UQ - // cmpPredNLT_US is Not-less-than (unordered, signaling) - cmpPredNLT_US - // cmpPredNLE_US is Not-less-than-or-equal (unordered, signaling) - cmpPredNLE_US - // cmpPredORD_Q is Ordered (non-signaling) - cmpPredORD_Q - // cmpPredEQ_UQ is Equal (unordered, non-signaling) - cmpPredEQ_UQ - // cmpPredNGE_US is Not-greater-than-or-equal (unordered, signaling) - cmpPredNGE_US - // cmpPredNGT_US is Not-greater-than (unordered, signaling) - cmpPredNGT_US - // cmpPredFALSE_OQ is False (ordered, non-signaling) - cmpPredFALSE_OQ - // cmpPredNEQ_OQ is Not-equal (ordered, non-signaling) - cmpPredNEQ_OQ - // cmpPredGE_OS is Greater-than-or-equal (ordered, signaling) - cmpPredGE_OS - // cmpPredGT_OS is Greater-than (ordered, signaling) - cmpPredGT_OS - // cmpPredTRUE_UQ is True (unordered, non-signaling) - cmpPredTRUE_UQ - // Equal (ordered, signaling) - cmpPredEQ_OS - // Less-than (ordered, nonsignaling) - cmpPredLT_OQ - // Less-than-or-equal (ordered, nonsignaling) - cmpPredLE_OQ - // Unordered (signaling) - cmpPredUNORD_S - // Not-equal (unordered, signaling) - cmpPredNEQ_US - // Not-less-than (unordered, nonsignaling) - cmpPredNLT_UQ - // Not-less-than-or-equal (unordered, nonsignaling) - cmpPredNLE_UQ - // Ordered (signaling) - cmpPredORD_S - // Equal (unordered, signaling) - cmpPredEQ_US - // Not-greater-than-or-equal (unordered, non-signaling) - cmpPredNGE_UQ - // Not-greater-than (unordered, nonsignaling) - cmpPredNGT_UQ - // False (ordered, signaling) - cmpPredFALSE_OS - // Not-equal (ordered, signaling) - cmpPredNEQ_OS - // Greater-than-or-equal (ordered, nonsignaling) - cmpPredGE_OQ - // Greater-than (ordered, nonsignaling) - cmpPredGT_OQ - // True (unordered, signaling) - cmpPredTRUE_US -) - -func (r cmpPred) String() string { - switch r { - case cmpPredEQ_OQ: - return "eq_oq" - case cmpPredLT_OS: - return "lt_os" - case cmpPredLE_OS: - return "le_os" - case cmpPredUNORD_Q: - return "unord_q" - case cmpPredNEQ_UQ: - return "neq_uq" - case cmpPredNLT_US: - return "nlt_us" - case cmpPredNLE_US: - return "nle_us" - case cmpPredORD_Q: - return "ord_q" - case cmpPredEQ_UQ: - return "eq_uq" - case cmpPredNGE_US: - return "nge_us" - case cmpPredNGT_US: - return "ngt_us" - case cmpPredFALSE_OQ: - return "false_oq" - case cmpPredNEQ_OQ: - return "neq_oq" - case cmpPredGE_OS: - return "ge_os" - case cmpPredGT_OS: - return "gt_os" - case cmpPredTRUE_UQ: - return "true_uq" - case cmpPredEQ_OS: - return "eq_os" - case cmpPredLT_OQ: - return "lt_oq" - case cmpPredLE_OQ: - return "le_oq" - case cmpPredUNORD_S: - return "unord_s" - case cmpPredNEQ_US: - return "neq_us" - case cmpPredNLT_UQ: - return "nlt_uq" - case cmpPredNLE_UQ: - return "nle_uq" - case cmpPredORD_S: - return "ord_s" - case cmpPredEQ_US: - return "eq_us" - case cmpPredNGE_UQ: - return "nge_uq" - case cmpPredNGT_UQ: - return "ngt_uq" - case cmpPredFALSE_OS: - return "false_os" - case cmpPredNEQ_OS: - return "neq_os" - case cmpPredGE_OQ: - return "ge_oq" - case cmpPredGT_OQ: - return "gt_oq" - case cmpPredTRUE_US: - return "true_us" - default: - panic("BUG") - } -} - -func linkInstr(prev, next *instruction) *instruction { - prev.next = next - next.prev = prev - return next -} - -type defKind byte - -const ( - defKindNone defKind = iota + 1 - defKindOp2 - defKindCall - defKindDivRem -) - -var defKinds = [instrMax]defKind{ - nop0: defKindNone, - ret: defKindNone, - movRR: defKindOp2, - movRM: defKindNone, - xmmMovRM: defKindNone, - aluRmiR: defKindNone, - shiftR: defKindNone, - imm: defKindOp2, - unaryRmR: defKindOp2, - xmmRmiReg: defKindNone, - xmmUnaryRmR: defKindOp2, - xmmUnaryRmRImm: defKindOp2, - xmmCmpRmR: defKindNone, - xmmRmR: defKindNone, - xmmRmRImm: defKindNone, - mov64MR: defKindOp2, - movsxRmR: defKindOp2, - movzxRmR: defKindOp2, - gprToXmm: defKindOp2, - xmmToGpr: defKindOp2, - cmove: defKindNone, - call: defKindCall, - callIndirect: defKindCall, - ud2: defKindNone, - jmp: defKindNone, - jmpIf: defKindNone, - jmpTableIsland: defKindNone, - cmpRmiR: defKindNone, - exitSequence: defKindNone, - lea: defKindOp2, - setcc: defKindOp2, - zeros: defKindOp2, - sourceOffsetInfo: defKindNone, - fcvtToSintSequence: defKindNone, - defineUninitializedReg: defKindOp2, - fcvtToUintSequence: defKindNone, - xmmCMov: defKindOp2, - idivRemSequence: defKindDivRem, - blendvpd: defKindNone, - mfence: defKindNone, - xchg: defKindNone, - lockcmpxchg: defKindNone, - lockxadd: defKindNone, - neg: defKindNone, - nopUseReg: defKindNone, -} - -// String implements fmt.Stringer. -func (d defKind) String() string { - switch d { - case defKindNone: - return "none" - case defKindOp2: - return "op2" - case defKindCall: - return "call" - case defKindDivRem: - return "divrem" - default: - return "invalid" - } -} - -type useKind byte - -const ( - useKindNone useKind = iota + 1 - useKindOp1 - // useKindOp1Op2Reg is Op1 can be any operand, Op2 must be a register. - useKindOp1Op2Reg - // useKindOp1RegOp2 is Op1 must be a register, Op2 can be any operand. - useKindOp1RegOp2 - // useKindRaxOp1RegOp2 is Op1 must be a register, Op2 can be any operand, and RAX is used. - useKindRaxOp1RegOp2 - useKindDivRem - useKindBlendvpd - useKindCall - useKindCallInd - useKindFcvtToSintSequence - useKindFcvtToUintSequence -) - -var useKinds = [instrMax]useKind{ - nop0: useKindNone, - ret: useKindNone, - movRR: useKindOp1, - movRM: useKindOp1RegOp2, - xmmMovRM: useKindOp1RegOp2, - cmove: useKindOp1Op2Reg, - aluRmiR: useKindOp1Op2Reg, - shiftR: useKindOp1Op2Reg, - imm: useKindNone, - unaryRmR: useKindOp1, - xmmRmiReg: useKindOp1Op2Reg, - xmmUnaryRmR: useKindOp1, - xmmUnaryRmRImm: useKindOp1, - xmmCmpRmR: useKindOp1Op2Reg, - xmmRmR: useKindOp1Op2Reg, - xmmRmRImm: useKindOp1Op2Reg, - mov64MR: useKindOp1, - movzxRmR: useKindOp1, - movsxRmR: useKindOp1, - gprToXmm: useKindOp1, - xmmToGpr: useKindOp1, - call: useKindCall, - callIndirect: useKindCallInd, - ud2: useKindNone, - jmpIf: useKindOp1, - jmp: useKindOp1, - cmpRmiR: useKindOp1Op2Reg, - exitSequence: useKindOp1, - lea: useKindOp1, - jmpTableIsland: useKindNone, - setcc: useKindNone, - zeros: useKindNone, - sourceOffsetInfo: useKindNone, - fcvtToSintSequence: useKindFcvtToSintSequence, - defineUninitializedReg: useKindNone, - fcvtToUintSequence: useKindFcvtToUintSequence, - xmmCMov: useKindOp1, - idivRemSequence: useKindDivRem, - blendvpd: useKindBlendvpd, - mfence: useKindNone, - xchg: useKindOp1RegOp2, - lockcmpxchg: useKindRaxOp1RegOp2, - lockxadd: useKindOp1RegOp2, - neg: useKindOp1, - nopUseReg: useKindOp1, -} - -func (u useKind) String() string { - switch u { - case useKindNone: - return "none" - case useKindOp1: - return "op1" - case useKindOp1Op2Reg: - return "op1op2Reg" - case useKindOp1RegOp2: - return "op1RegOp2" - case useKindCall: - return "call" - case useKindCallInd: - return "callInd" - default: - return "invalid" - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go deleted file mode 100644 index 6637b428c..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go +++ /dev/null @@ -1,1683 +0,0 @@ -package amd64 - -import ( - "fmt" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -func (i *instruction) encode(c backend.Compiler) (needsLabelResolution bool) { - switch kind := i.kind; kind { - case nop0, sourceOffsetInfo, defineUninitializedReg, fcvtToSintSequence, fcvtToUintSequence, nopUseReg: - case ret: - encodeRet(c) - case imm: - dst := regEncodings[i.op2.reg().RealReg()] - con := i.u1 - if i.b1 { // 64 bit. - if lower32willSignExtendTo64(con) { - // Sign extend mov(imm32). - encodeRegReg(c, - legacyPrefixesNone, - 0xc7, 1, - 0, - dst, - rexInfo(0).setW(), - ) - c.Emit4Bytes(uint32(con)) - } else { - c.EmitByte(rexEncodingW | dst.rexBit()) - c.EmitByte(0xb8 | dst.encoding()) - c.Emit8Bytes(con) - } - } else { - if dst.rexBit() > 0 { - c.EmitByte(rexEncodingDefault | 0x1) - } - c.EmitByte(0xb8 | dst.encoding()) - c.Emit4Bytes(uint32(con)) - } - - case aluRmiR: - var rex rexInfo - if i.b1 { - rex = rex.setW() - } else { - rex = rex.clearW() - } - - dst := regEncodings[i.op2.reg().RealReg()] - - aluOp := aluRmiROpcode(i.u1) - if aluOp == aluRmiROpcodeMul { - op1 := i.op1 - const regMemOpc, regMemOpcNum = 0x0FAF, 2 - switch op1.kind { - case operandKindReg: - src := regEncodings[op1.reg().RealReg()] - encodeRegReg(c, legacyPrefixesNone, regMemOpc, regMemOpcNum, dst, src, rex) - case operandKindMem: - m := i.op1.addressMode() - encodeRegMem(c, legacyPrefixesNone, regMemOpc, regMemOpcNum, dst, m, rex) - case operandKindImm32: - imm8 := lower8willSignExtendTo32(op1.imm32()) - var opc uint32 - if imm8 { - opc = 0x6b - } else { - opc = 0x69 - } - encodeRegReg(c, legacyPrefixesNone, opc, 1, dst, dst, rex) - if imm8 { - c.EmitByte(byte(op1.imm32())) - } else { - c.Emit4Bytes(op1.imm32()) - } - default: - panic("BUG: invalid operand kind") - } - } else { - const opcodeNum = 1 - var opcR, opcM, subOpcImm uint32 - switch aluOp { - case aluRmiROpcodeAdd: - opcR, opcM, subOpcImm = 0x01, 0x03, 0x0 - case aluRmiROpcodeSub: - opcR, opcM, subOpcImm = 0x29, 0x2b, 0x5 - case aluRmiROpcodeAnd: - opcR, opcM, subOpcImm = 0x21, 0x23, 0x4 - case aluRmiROpcodeOr: - opcR, opcM, subOpcImm = 0x09, 0x0b, 0x1 - case aluRmiROpcodeXor: - opcR, opcM, subOpcImm = 0x31, 0x33, 0x6 - default: - panic("BUG: invalid aluRmiROpcode") - } - - op1 := i.op1 - switch op1.kind { - case operandKindReg: - src := regEncodings[op1.reg().RealReg()] - encodeRegReg(c, legacyPrefixesNone, opcR, opcodeNum, src, dst, rex) - case operandKindMem: - m := i.op1.addressMode() - encodeRegMem(c, legacyPrefixesNone, opcM, opcodeNum, dst, m, rex) - case operandKindImm32: - imm8 := lower8willSignExtendTo32(op1.imm32()) - var opc uint32 - if imm8 { - opc = 0x83 - } else { - opc = 0x81 - } - encodeRegReg(c, legacyPrefixesNone, opc, opcodeNum, regEnc(subOpcImm), dst, rex) - if imm8 { - c.EmitByte(byte(op1.imm32())) - } else { - c.Emit4Bytes(op1.imm32()) - } - default: - panic("BUG: invalid operand kind") - } - } - - case movRR: - src := regEncodings[i.op1.reg().RealReg()] - dst := regEncodings[i.op2.reg().RealReg()] - var rex rexInfo - if i.b1 { - rex = rex.setW() - } else { - rex = rex.clearW() - } - encodeRegReg(c, legacyPrefixesNone, 0x89, 1, src, dst, rex) - - case xmmRmR, blendvpd: - op := sseOpcode(i.u1) - var legPrex legacyPrefixes - var opcode uint32 - var opcodeNum uint32 - switch op { - case sseOpcodeAddps: - legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F58, 2 - case sseOpcodeAddpd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F58, 2 - case sseOpcodeAddss: - legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F58, 2 - case sseOpcodeAddsd: - legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F58, 2 - case sseOpcodeAndps: - legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F54, 2 - case sseOpcodeAndpd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F54, 2 - case sseOpcodeAndnps: - legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F55, 2 - case sseOpcodeAndnpd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F55, 2 - case sseOpcodeBlendvps: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3814, 3 - case sseOpcodeBlendvpd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3815, 3 - case sseOpcodeDivps: - legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F5E, 2 - case sseOpcodeDivpd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F5E, 2 - case sseOpcodeDivss: - legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5E, 2 - case sseOpcodeDivsd: - legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F5E, 2 - case sseOpcodeMaxps: - legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F5F, 2 - case sseOpcodeMaxpd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F5F, 2 - case sseOpcodeMaxss: - legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5F, 2 - case sseOpcodeMaxsd: - legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F5F, 2 - case sseOpcodeMinps: - legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F5D, 2 - case sseOpcodeMinpd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F5D, 2 - case sseOpcodeMinss: - legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5D, 2 - case sseOpcodeMinsd: - legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F5D, 2 - case sseOpcodeMovlhps: - legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F16, 2 - case sseOpcodeMovsd: - legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F10, 2 - case sseOpcodeMulps: - legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F59, 2 - case sseOpcodeMulpd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F59, 2 - case sseOpcodeMulss: - legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F59, 2 - case sseOpcodeMulsd: - legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F59, 2 - case sseOpcodeOrpd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F56, 2 - case sseOpcodeOrps: - legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F56, 2 - case sseOpcodePackssdw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F6B, 2 - case sseOpcodePacksswb: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F63, 2 - case sseOpcodePackusdw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F382B, 3 - case sseOpcodePackuswb: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F67, 2 - case sseOpcodePaddb: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FFC, 2 - case sseOpcodePaddd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FFE, 2 - case sseOpcodePaddq: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FD4, 2 - case sseOpcodePaddw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FFD, 2 - case sseOpcodePaddsb: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FEC, 2 - case sseOpcodePaddsw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FED, 2 - case sseOpcodePaddusb: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDC, 2 - case sseOpcodePaddusw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDD, 2 - case sseOpcodePand: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDB, 2 - case sseOpcodePandn: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDF, 2 - case sseOpcodePavgb: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FE0, 2 - case sseOpcodePavgw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FE3, 2 - case sseOpcodePcmpeqb: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F74, 2 - case sseOpcodePcmpeqw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F75, 2 - case sseOpcodePcmpeqd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F76, 2 - case sseOpcodePcmpeqq: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3829, 3 - case sseOpcodePcmpgtb: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F64, 2 - case sseOpcodePcmpgtw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F65, 2 - case sseOpcodePcmpgtd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F66, 2 - case sseOpcodePcmpgtq: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3837, 3 - case sseOpcodePmaddwd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FF5, 2 - case sseOpcodePmaxsb: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383C, 3 - case sseOpcodePmaxsw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FEE, 2 - case sseOpcodePmaxsd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383D, 3 - case sseOpcodePmaxub: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDE, 2 - case sseOpcodePmaxuw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383E, 3 - case sseOpcodePmaxud: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383F, 3 - case sseOpcodePminsb: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3838, 3 - case sseOpcodePminsw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FEA, 2 - case sseOpcodePminsd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3839, 3 - case sseOpcodePminub: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FDA, 2 - case sseOpcodePminuw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383A, 3 - case sseOpcodePminud: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F383B, 3 - case sseOpcodePmulld: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3840, 3 - case sseOpcodePmullw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FD5, 2 - case sseOpcodePmuludq: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FF4, 2 - case sseOpcodePor: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FEB, 2 - case sseOpcodePshufb: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3800, 3 - case sseOpcodePsubb: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FF8, 2 - case sseOpcodePsubd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FFA, 2 - case sseOpcodePsubq: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FFB, 2 - case sseOpcodePsubw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FF9, 2 - case sseOpcodePsubsb: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FE8, 2 - case sseOpcodePsubsw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FE9, 2 - case sseOpcodePsubusb: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FD8, 2 - case sseOpcodePsubusw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FD9, 2 - case sseOpcodePunpckhbw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F68, 2 - case sseOpcodePunpcklbw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F60, 2 - case sseOpcodePxor: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FEF, 2 - case sseOpcodeSubps: - legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F5C, 2 - case sseOpcodeSubpd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F5C, 2 - case sseOpcodeSubss: - legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5C, 2 - case sseOpcodeSubsd: - legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F5C, 2 - case sseOpcodeXorps: - legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F57, 2 - case sseOpcodeXorpd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F57, 2 - case sseOpcodePmulhrsw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F380B, 3 - case sseOpcodeUnpcklps: - legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0F14, 2 - case sseOpcodePmaddubsw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3804, 3 - default: - if kind == blendvpd { - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3815, 3 - } else { - panic(fmt.Sprintf("Unsupported sseOpcode: %s", op)) - } - } - - dst := regEncodings[i.op2.reg().RealReg()] - - rex := rexInfo(0).clearW() - op1 := i.op1 - if op1.kind == operandKindReg { - src := regEncodings[op1.reg().RealReg()] - encodeRegReg(c, legPrex, opcode, opcodeNum, dst, src, rex) - } else if i.op1.kind == operandKindMem { - m := i.op1.addressMode() - encodeRegMem(c, legPrex, opcode, opcodeNum, dst, m, rex) - } else { - panic("BUG: invalid operand kind") - } - - case gprToXmm: - var legPrefix legacyPrefixes - var opcode uint32 - const opcodeNum = 2 - switch sseOpcode(i.u1) { - case sseOpcodeMovd, sseOpcodeMovq: - legPrefix, opcode = legacyPrefixes0x66, 0x0f6e - case sseOpcodeCvtsi2ss: - legPrefix, opcode = legacyPrefixes0xF3, 0x0f2a - case sseOpcodeCvtsi2sd: - legPrefix, opcode = legacyPrefixes0xF2, 0x0f2a - default: - panic(fmt.Sprintf("Unsupported sseOpcode: %s", sseOpcode(i.u1))) - } - - var rex rexInfo - if i.b1 { - rex = rex.setW() - } else { - rex = rex.clearW() - } - dst := regEncodings[i.op2.reg().RealReg()] - - op1 := i.op1 - if op1.kind == operandKindReg { - src := regEncodings[op1.reg().RealReg()] - encodeRegReg(c, legPrefix, opcode, opcodeNum, dst, src, rex) - } else if i.op1.kind == operandKindMem { - m := i.op1.addressMode() - encodeRegMem(c, legPrefix, opcode, opcodeNum, dst, m, rex) - } else { - panic("BUG: invalid operand kind") - } - - case xmmUnaryRmR: - var prefix legacyPrefixes - var opcode uint32 - var opcodeNum uint32 - op := sseOpcode(i.u1) - switch op { - case sseOpcodeCvtss2sd: - prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5A, 2 - case sseOpcodeCvtsd2ss: - prefix, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F5A, 2 - case sseOpcodeMovaps: - prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F28, 2 - case sseOpcodeMovapd: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F28, 2 - case sseOpcodeMovdqa: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F6F, 2 - case sseOpcodeMovdqu: - prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F6F, 2 - case sseOpcodeMovsd: - prefix, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F10, 2 - case sseOpcodeMovss: - prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F10, 2 - case sseOpcodeMovups: - prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F10, 2 - case sseOpcodeMovupd: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F10, 2 - case sseOpcodePabsb: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F381C, 3 - case sseOpcodePabsw: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F381D, 3 - case sseOpcodePabsd: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F381E, 3 - case sseOpcodePmovsxbd: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3821, 3 - case sseOpcodePmovsxbw: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3820, 3 - case sseOpcodePmovsxbq: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3822, 3 - case sseOpcodePmovsxwd: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3823, 3 - case sseOpcodePmovsxwq: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3824, 3 - case sseOpcodePmovsxdq: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3825, 3 - case sseOpcodePmovzxbd: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3831, 3 - case sseOpcodePmovzxbw: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3830, 3 - case sseOpcodePmovzxbq: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3832, 3 - case sseOpcodePmovzxwd: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3833, 3 - case sseOpcodePmovzxwq: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3834, 3 - case sseOpcodePmovzxdq: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3835, 3 - case sseOpcodeSqrtps: - prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F51, 2 - case sseOpcodeSqrtpd: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F51, 2 - case sseOpcodeSqrtss: - prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F51, 2 - case sseOpcodeSqrtsd: - prefix, opcode, opcodeNum = legacyPrefixes0xF2, 0x0F51, 2 - case sseOpcodeXorps: - prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F57, 2 - case sseOpcodeXorpd: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F57, 2 - case sseOpcodeCvtdq2ps: - prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F5B, 2 - case sseOpcodeCvtdq2pd: - prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0FE6, 2 - case sseOpcodeCvtps2pd: - prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0F5A, 2 - case sseOpcodeCvtpd2ps: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0F5A, 2 - case sseOpcodeCvttps2dq: - prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0F5B, 2 - case sseOpcodeCvttpd2dq: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0FE6, 2 - default: - panic(fmt.Sprintf("Unsupported sseOpcode: %s", op)) - } - - dst := regEncodings[i.op2.reg().RealReg()] - - rex := rexInfo(0).clearW() - op1 := i.op1 - if op1.kind == operandKindReg { - src := regEncodings[op1.reg().RealReg()] - encodeRegReg(c, prefix, opcode, opcodeNum, dst, src, rex) - } else if i.op1.kind == operandKindMem { - m := i.op1.addressMode() - needsLabelResolution = encodeRegMem(c, prefix, opcode, opcodeNum, dst, m, rex) - } else { - panic("BUG: invalid operand kind") - } - - case xmmUnaryRmRImm: - var prefix legacyPrefixes - var opcode uint32 - var opcodeNum uint32 - op := sseOpcode(i.u1) - switch op { - case sseOpcodeRoundps: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f3a08, 3 - case sseOpcodeRoundss: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f3a0a, 3 - case sseOpcodeRoundpd: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f3a09, 3 - case sseOpcodeRoundsd: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f3a0b, 3 - } - rex := rexInfo(0).clearW() - dst := regEncodings[i.op2.reg().RealReg()] - op1 := i.op1 - if op1.kind == operandKindReg { - src := regEncodings[op1.reg().RealReg()] - encodeRegReg(c, prefix, opcode, opcodeNum, dst, src, rex) - } else if i.op1.kind == operandKindMem { - m := i.op1.addressMode() - encodeRegMem(c, prefix, opcode, opcodeNum, dst, m, rex) - } else { - panic("BUG: invalid operand kind") - } - - c.EmitByte(byte(i.u2)) - - case unaryRmR: - var prefix legacyPrefixes - var opcode uint32 - var opcodeNum uint32 - op := unaryRmROpcode(i.u1) - // We assume size is either 32 or 64. - switch op { - case unaryRmROpcodeBsr: - prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0fbd, 2 - case unaryRmROpcodeBsf: - prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0fbc, 2 - case unaryRmROpcodeLzcnt: - prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0fbd, 2 - case unaryRmROpcodeTzcnt: - prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0fbc, 2 - case unaryRmROpcodePopcnt: - prefix, opcode, opcodeNum = legacyPrefixes0xF3, 0x0fb8, 2 - default: - panic(fmt.Sprintf("Unsupported unaryRmROpcode: %s", op)) - } - - dst := regEncodings[i.op2.reg().RealReg()] - - rex := rexInfo(0) - if i.b1 { // 64 bit. - rex = rexInfo(0).setW() - } else { - rex = rexInfo(0).clearW() - } - op1 := i.op1 - if op1.kind == operandKindReg { - src := regEncodings[op1.reg().RealReg()] - encodeRegReg(c, prefix, opcode, opcodeNum, dst, src, rex) - } else if i.op1.kind == operandKindMem { - m := i.op1.addressMode() - encodeRegMem(c, prefix, opcode, opcodeNum, dst, m, rex) - } else { - panic("BUG: invalid operand kind") - } - - case not: - var prefix legacyPrefixes - src := regEncodings[i.op1.reg().RealReg()] - rex := rexInfo(0) - if i.b1 { // 64 bit. - rex = rexInfo(0).setW() - } else { - rex = rexInfo(0).clearW() - } - subopcode := uint8(2) - encodeEncEnc(c, prefix, 0xf7, 1, subopcode, uint8(src), rex) - - case neg: - var prefix legacyPrefixes - src := regEncodings[i.op1.reg().RealReg()] - rex := rexInfo(0) - if i.b1 { // 64 bit. - rex = rexInfo(0).setW() - } else { - rex = rexInfo(0).clearW() - } - subopcode := uint8(3) - encodeEncEnc(c, prefix, 0xf7, 1, subopcode, uint8(src), rex) - - case div: - rex := rexInfo(0) - if i.b1 { // 64 bit. - rex = rexInfo(0).setW() - } else { - rex = rexInfo(0).clearW() - } - var subopcode uint8 - if i.u1 != 0 { // Signed. - subopcode = 7 - } else { - subopcode = 6 - } - - divisor := i.op1 - if divisor.kind == operandKindReg { - src := regEncodings[divisor.reg().RealReg()] - encodeEncEnc(c, legacyPrefixesNone, 0xf7, 1, subopcode, uint8(src), rex) - } else if divisor.kind == operandKindMem { - m := divisor.addressMode() - encodeEncMem(c, legacyPrefixesNone, 0xf7, 1, subopcode, m, rex) - } else { - panic("BUG: invalid operand kind") - } - - case mulHi: - var prefix legacyPrefixes - rex := rexInfo(0) - if i.b1 { // 64 bit. - rex = rexInfo(0).setW() - } else { - rex = rexInfo(0).clearW() - } - - signed := i.u1 != 0 - var subopcode uint8 - if signed { - subopcode = 5 - } else { - subopcode = 4 - } - - // src1 is implicitly rax, - // dst_lo is implicitly rax, - // dst_hi is implicitly rdx. - src2 := i.op1 - if src2.kind == operandKindReg { - src := regEncodings[src2.reg().RealReg()] - encodeEncEnc(c, prefix, 0xf7, 1, subopcode, uint8(src), rex) - } else if src2.kind == operandKindMem { - m := src2.addressMode() - encodeEncMem(c, prefix, 0xf7, 1, subopcode, m, rex) - } else { - panic("BUG: invalid operand kind") - } - - case signExtendData: - if i.b1 { // 64 bit. - c.EmitByte(0x48) - c.EmitByte(0x99) - } else { - c.EmitByte(0x99) - } - case movzxRmR, movsxRmR: - signed := i.kind == movsxRmR - - ext := extMode(i.u1) - var opcode uint32 - var opcodeNum uint32 - var rex rexInfo - switch ext { - case extModeBL: - if signed { - opcode, opcodeNum, rex = 0x0fbe, 2, rex.clearW() - } else { - opcode, opcodeNum, rex = 0x0fb6, 2, rex.clearW() - } - case extModeBQ: - if signed { - opcode, opcodeNum, rex = 0x0fbe, 2, rex.setW() - } else { - opcode, opcodeNum, rex = 0x0fb6, 2, rex.setW() - } - case extModeWL: - if signed { - opcode, opcodeNum, rex = 0x0fbf, 2, rex.clearW() - } else { - opcode, opcodeNum, rex = 0x0fb7, 2, rex.clearW() - } - case extModeWQ: - if signed { - opcode, opcodeNum, rex = 0x0fbf, 2, rex.setW() - } else { - opcode, opcodeNum, rex = 0x0fb7, 2, rex.setW() - } - case extModeLQ: - if signed { - opcode, opcodeNum, rex = 0x63, 1, rex.setW() - } else { - opcode, opcodeNum, rex = 0x8b, 1, rex.clearW() - } - default: - panic("BUG: invalid extMode") - } - - op := i.op1 - dst := regEncodings[i.op2.reg().RealReg()] - switch op.kind { - case operandKindReg: - src := regEncodings[op.reg().RealReg()] - if ext == extModeBL || ext == extModeBQ { - // Some destinations must be encoded with REX.R = 1. - if e := src.encoding(); e >= 4 && e <= 7 { - rex = rex.always() - } - } - encodeRegReg(c, legacyPrefixesNone, opcode, opcodeNum, dst, src, rex) - case operandKindMem: - m := op.addressMode() - encodeRegMem(c, legacyPrefixesNone, opcode, opcodeNum, dst, m, rex) - default: - panic("BUG: invalid operand kind") - } - - case mov64MR: - m := i.op1.addressMode() - encodeLoad64(c, m, i.op2.reg().RealReg()) - - case lea: - needsLabelResolution = true - dst := regEncodings[i.op2.reg().RealReg()] - rex := rexInfo(0).setW() - const opcode, opcodeNum = 0x8d, 1 - switch i.op1.kind { - case operandKindMem: - a := i.op1.addressMode() - encodeRegMem(c, legacyPrefixesNone, opcode, opcodeNum, dst, a, rex) - case operandKindLabel: - rex.encode(c, regRexBit(byte(dst)), 0) - c.EmitByte(byte((opcode) & 0xff)) - - // Indicate "LEAQ [RIP + 32bit displacement]. - // https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing - c.EmitByte(encodeModRM(0b00, dst.encoding(), 0b101)) - - // This will be resolved later, so we just emit a placeholder (0xffffffff for testing). - c.Emit4Bytes(0xffffffff) - default: - panic("BUG: invalid operand kind") - } - - case movRM: - m := i.op2.addressMode() - src := regEncodings[i.op1.reg().RealReg()] - - var rex rexInfo - switch i.u1 { - case 1: - if e := src.encoding(); e >= 4 && e <= 7 { - rex = rex.always() - } - encodeRegMem(c, legacyPrefixesNone, 0x88, 1, src, m, rex.clearW()) - case 2: - encodeRegMem(c, legacyPrefixes0x66, 0x89, 1, src, m, rex.clearW()) - case 4: - encodeRegMem(c, legacyPrefixesNone, 0x89, 1, src, m, rex.clearW()) - case 8: - encodeRegMem(c, legacyPrefixesNone, 0x89, 1, src, m, rex.setW()) - default: - panic(fmt.Sprintf("BUG: invalid size %d: %s", i.u1, i.String())) - } - - case shiftR: - src := regEncodings[i.op2.reg().RealReg()] - amount := i.op1 - - var opcode uint32 - var prefix legacyPrefixes - rex := rexInfo(0) - if i.b1 { // 64 bit. - rex = rexInfo(0).setW() - } else { - rex = rexInfo(0).clearW() - } - - switch amount.kind { - case operandKindReg: - if amount.reg() != rcxVReg { - panic("BUG: invalid reg operand: must be rcx") - } - opcode, prefix = 0xd3, legacyPrefixesNone - encodeEncEnc(c, prefix, opcode, 1, uint8(i.u1), uint8(src), rex) - case operandKindImm32: - opcode, prefix = 0xc1, legacyPrefixesNone - encodeEncEnc(c, prefix, opcode, 1, uint8(i.u1), uint8(src), rex) - c.EmitByte(byte(amount.imm32())) - default: - panic("BUG: invalid operand kind") - } - case xmmRmiReg: - const legPrefix = legacyPrefixes0x66 - rex := rexInfo(0).clearW() - dst := regEncodings[i.op2.reg().RealReg()] - - var opcode uint32 - var regDigit uint8 - - op := sseOpcode(i.u1) - op1 := i.op1 - if i.op1.kind == operandKindImm32 { - switch op { - case sseOpcodePsllw: - opcode, regDigit = 0x0f71, 6 - case sseOpcodePslld: - opcode, regDigit = 0x0f72, 6 - case sseOpcodePsllq: - opcode, regDigit = 0x0f73, 6 - case sseOpcodePsraw: - opcode, regDigit = 0x0f71, 4 - case sseOpcodePsrad: - opcode, regDigit = 0x0f72, 4 - case sseOpcodePsrlw: - opcode, regDigit = 0x0f71, 2 - case sseOpcodePsrld: - opcode, regDigit = 0x0f72, 2 - case sseOpcodePsrlq: - opcode, regDigit = 0x0f73, 2 - default: - panic("invalid opcode") - } - - encodeEncEnc(c, legPrefix, opcode, 2, regDigit, uint8(dst), rex) - imm32 := op1.imm32() - if imm32 > 0xff&imm32 { - panic("immediate value does not fit 1 byte") - } - c.EmitByte(uint8(imm32)) - } else { - switch op { - case sseOpcodePsllw: - opcode = 0x0ff1 - case sseOpcodePslld: - opcode = 0x0ff2 - case sseOpcodePsllq: - opcode = 0x0ff3 - case sseOpcodePsraw: - opcode = 0x0fe1 - case sseOpcodePsrad: - opcode = 0x0fe2 - case sseOpcodePsrlw: - opcode = 0x0fd1 - case sseOpcodePsrld: - opcode = 0x0fd2 - case sseOpcodePsrlq: - opcode = 0x0fd3 - default: - panic("invalid opcode") - } - - if op1.kind == operandKindReg { - reg := regEncodings[op1.reg().RealReg()] - encodeRegReg(c, legPrefix, opcode, 2, dst, reg, rex) - } else if op1.kind == operandKindMem { - m := op1.addressMode() - encodeRegMem(c, legPrefix, opcode, 2, dst, m, rex) - } else { - panic("BUG: invalid operand kind") - } - } - - case cmpRmiR: - var opcode uint32 - isCmp := i.u1 != 0 - rex := rexInfo(0) - _64 := i.b1 - if _64 { // 64 bit. - rex = rex.setW() - } else { - rex = rex.clearW() - } - dst := regEncodings[i.op2.reg().RealReg()] - op1 := i.op1 - switch op1.kind { - case operandKindReg: - reg := regEncodings[op1.reg().RealReg()] - if isCmp { - opcode = 0x39 - } else { - opcode = 0x85 - } - // Here we swap the encoding of the operands for CMP to be consistent with the output of LLVM/GCC. - encodeRegReg(c, legacyPrefixesNone, opcode, 1, reg, dst, rex) - - case operandKindMem: - if isCmp { - opcode = 0x3b - } else { - opcode = 0x85 - } - m := op1.addressMode() - encodeRegMem(c, legacyPrefixesNone, opcode, 1, dst, m, rex) - - case operandKindImm32: - imm32 := op1.imm32() - useImm8 := isCmp && lower8willSignExtendTo32(imm32) - var subopcode uint8 - - switch { - case isCmp && useImm8: - opcode, subopcode = 0x83, 7 - case isCmp && !useImm8: - opcode, subopcode = 0x81, 7 - default: - opcode, subopcode = 0xf7, 0 - } - encodeEncEnc(c, legacyPrefixesNone, opcode, 1, subopcode, uint8(dst), rex) - if useImm8 { - c.EmitByte(uint8(imm32)) - } else { - c.Emit4Bytes(imm32) - } - - default: - panic("BUG: invalid operand kind") - } - case setcc: - cc := cond(i.u1) - dst := regEncodings[i.op2.reg().RealReg()] - rex := rexInfo(0).clearW().always() - opcode := uint32(0x0f90) + uint32(cc) - encodeEncEnc(c, legacyPrefixesNone, opcode, 2, 0, uint8(dst), rex) - case cmove: - cc := cond(i.u1) - dst := regEncodings[i.op2.reg().RealReg()] - rex := rexInfo(0) - if i.b1 { // 64 bit. - rex = rex.setW() - } else { - rex = rex.clearW() - } - opcode := uint32(0x0f40) + uint32(cc) - src := i.op1 - switch src.kind { - case operandKindReg: - srcReg := regEncodings[src.reg().RealReg()] - encodeRegReg(c, legacyPrefixesNone, opcode, 2, dst, srcReg, rex) - case operandKindMem: - m := src.addressMode() - encodeRegMem(c, legacyPrefixesNone, opcode, 2, dst, m, rex) - default: - panic("BUG: invalid operand kind") - } - case push64: - op := i.op1 - - switch op.kind { - case operandKindReg: - dst := regEncodings[op.reg().RealReg()] - if dst.rexBit() > 0 { - c.EmitByte(rexEncodingDefault | 0x1) - } - c.EmitByte(0x50 | dst.encoding()) - case operandKindMem: - m := op.addressMode() - encodeRegMem( - c, legacyPrefixesNone, 0xff, 1, regEnc(6), m, rexInfo(0).clearW(), - ) - case operandKindImm32: - c.EmitByte(0x68) - c.Emit4Bytes(op.imm32()) - default: - panic("BUG: invalid operand kind") - } - - case pop64: - dst := regEncodings[i.op1.reg().RealReg()] - if dst.rexBit() > 0 { - c.EmitByte(rexEncodingDefault | 0x1) - } - c.EmitByte(0x58 | dst.encoding()) - - case xmmMovRM: - var legPrefix legacyPrefixes - var opcode uint32 - const opcodeNum = 2 - switch sseOpcode(i.u1) { - case sseOpcodeMovaps: - legPrefix, opcode = legacyPrefixesNone, 0x0f29 - case sseOpcodeMovapd: - legPrefix, opcode = legacyPrefixes0x66, 0x0f29 - case sseOpcodeMovdqa: - legPrefix, opcode = legacyPrefixes0x66, 0x0f7f - case sseOpcodeMovdqu: - legPrefix, opcode = legacyPrefixes0xF3, 0x0f7f - case sseOpcodeMovss: - legPrefix, opcode = legacyPrefixes0xF3, 0x0f11 - case sseOpcodeMovsd: - legPrefix, opcode = legacyPrefixes0xF2, 0x0f11 - case sseOpcodeMovups: - legPrefix, opcode = legacyPrefixesNone, 0x0f11 - case sseOpcodeMovupd: - legPrefix, opcode = legacyPrefixes0x66, 0x0f11 - default: - panic(fmt.Sprintf("Unsupported sseOpcode: %s", sseOpcode(i.u1))) - } - - dst := regEncodings[i.op1.reg().RealReg()] - encodeRegMem(c, legPrefix, opcode, opcodeNum, dst, i.op2.addressMode(), rexInfo(0).clearW()) - case xmmLoadConst: - panic("TODO") - case xmmToGpr: - var legPrefix legacyPrefixes - var opcode uint32 - var argSwap bool - const opcodeNum = 2 - switch sseOpcode(i.u1) { - case sseOpcodeMovd, sseOpcodeMovq: - legPrefix, opcode, argSwap = legacyPrefixes0x66, 0x0f7e, false - case sseOpcodeMovmskps: - legPrefix, opcode, argSwap = legacyPrefixesNone, 0x0f50, true - case sseOpcodeMovmskpd: - legPrefix, opcode, argSwap = legacyPrefixes0x66, 0x0f50, true - case sseOpcodePmovmskb: - legPrefix, opcode, argSwap = legacyPrefixes0x66, 0x0fd7, true - case sseOpcodeCvttss2si: - legPrefix, opcode, argSwap = legacyPrefixes0xF3, 0x0f2c, true - case sseOpcodeCvttsd2si: - legPrefix, opcode, argSwap = legacyPrefixes0xF2, 0x0f2c, true - default: - panic(fmt.Sprintf("Unsupported sseOpcode: %s", sseOpcode(i.u1))) - } - - var rex rexInfo - if i.b1 { - rex = rex.setW() - } else { - rex = rex.clearW() - } - src := regEncodings[i.op1.reg().RealReg()] - dst := regEncodings[i.op2.reg().RealReg()] - if argSwap { - src, dst = dst, src - } - encodeRegReg(c, legPrefix, opcode, opcodeNum, src, dst, rex) - - case cvtUint64ToFloatSeq: - panic("TODO") - case cvtFloatToSintSeq: - panic("TODO") - case cvtFloatToUintSeq: - panic("TODO") - case xmmMinMaxSeq: - panic("TODO") - case xmmCmpRmR: - var prefix legacyPrefixes - var opcode uint32 - var opcodeNum uint32 - rex := rexInfo(0) - _64 := i.b1 - if _64 { // 64 bit. - rex = rex.setW() - } else { - rex = rex.clearW() - } - - op := sseOpcode(i.u1) - switch op { - case sseOpcodePtest: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f3817, 3 - case sseOpcodeUcomisd: - prefix, opcode, opcodeNum = legacyPrefixes0x66, 0x0f2e, 2 - case sseOpcodeUcomiss: - prefix, opcode, opcodeNum = legacyPrefixesNone, 0x0f2e, 2 - default: - panic(fmt.Sprintf("Unsupported sseOpcode: %s", op)) - } - - dst := regEncodings[i.op2.reg().RealReg()] - op1 := i.op1 - switch op1.kind { - case operandKindReg: - reg := regEncodings[op1.reg().RealReg()] - encodeRegReg(c, prefix, opcode, opcodeNum, dst, reg, rex) - - case operandKindMem: - m := op1.addressMode() - encodeRegMem(c, prefix, opcode, opcodeNum, dst, m, rex) - - default: - panic("BUG: invalid operand kind") - } - case xmmRmRImm: - op := sseOpcode(i.u1) - var legPrex legacyPrefixes - var opcode uint32 - var opcodeNum uint32 - var swap bool - switch op { - case sseOpcodeCmpps: - legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0FC2, 2 - case sseOpcodeCmppd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FC2, 2 - case sseOpcodeCmpss: - legPrex, opcode, opcodeNum = legacyPrefixes0xF3, 0x0FC2, 2 - case sseOpcodeCmpsd: - legPrex, opcode, opcodeNum = legacyPrefixes0xF2, 0x0FC2, 2 - case sseOpcodeInsertps: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A21, 3 - case sseOpcodePalignr: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A0F, 3 - case sseOpcodePinsrb: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A20, 3 - case sseOpcodePinsrw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FC4, 2 - case sseOpcodePinsrd, sseOpcodePinsrq: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A22, 3 - case sseOpcodePextrb: - swap = true - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A14, 3 - case sseOpcodePextrw: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0FC5, 2 - case sseOpcodePextrd, sseOpcodePextrq: - swap = true - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A16, 3 - case sseOpcodePshufd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F70, 2 - case sseOpcodeRoundps: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A08, 3 - case sseOpcodeRoundpd: - legPrex, opcode, opcodeNum = legacyPrefixes0x66, 0x0F3A09, 3 - case sseOpcodeShufps: - legPrex, opcode, opcodeNum = legacyPrefixesNone, 0x0FC6, 2 - default: - panic(fmt.Sprintf("Unsupported sseOpcode: %s", op)) - } - - dst := regEncodings[i.op2.reg().RealReg()] - - var rex rexInfo - if op == sseOpcodePextrq || op == sseOpcodePinsrq { - rex = rexInfo(0).setW() - } else { - rex = rexInfo(0).clearW() - } - op1 := i.op1 - if op1.kind == operandKindReg { - src := regEncodings[op1.reg().RealReg()] - if swap { - src, dst = dst, src - } - encodeRegReg(c, legPrex, opcode, opcodeNum, dst, src, rex) - } else if i.op1.kind == operandKindMem { - if swap { - panic("BUG: this is not possible to encode") - } - m := i.op1.addressMode() - encodeRegMem(c, legPrex, opcode, opcodeNum, dst, m, rex) - } else { - panic("BUG: invalid operand kind") - } - - c.EmitByte(byte(i.u2)) - - case jmp: - const ( - regMemOpcode = 0xff - regMemOpcodeNum = 1 - regMemSubOpcode = 4 - ) - op := i.op1 - switch op.kind { - case operandKindLabel: - needsLabelResolution = true - fallthrough - case operandKindImm32: - c.EmitByte(0xe9) - c.Emit4Bytes(op.imm32()) - case operandKindMem: - m := op.addressMode() - encodeRegMem(c, - legacyPrefixesNone, - regMemOpcode, regMemOpcodeNum, - regMemSubOpcode, m, rexInfo(0).clearW(), - ) - case operandKindReg: - r := op.reg().RealReg() - encodeRegReg( - c, - legacyPrefixesNone, - regMemOpcode, regMemOpcodeNum, - regMemSubOpcode, - regEncodings[r], rexInfo(0).clearW(), - ) - default: - panic("BUG: invalid operand kind") - } - - case jmpIf: - op := i.op1 - switch op.kind { - case operandKindLabel: - needsLabelResolution = true - fallthrough - case operandKindImm32: - c.EmitByte(0x0f) - c.EmitByte(0x80 | cond(i.u1).encoding()) - c.Emit4Bytes(op.imm32()) - default: - panic("BUG: invalid operand kind") - } - - case jmpTableIsland: - needsLabelResolution = true - for tc := uint64(0); tc < i.u2; tc++ { - c.Emit8Bytes(0) - } - - case exitSequence: - execCtx := i.op1.reg() - allocatedAmode := i.op2.addressMode() - - // Restore the RBP, RSP, and return to the Go code: - *allocatedAmode = amode{ - kindWithShift: uint32(amodeImmReg), base: execCtx, - imm32: wazevoapi.ExecutionContextOffsetOriginalFramePointer.U32(), - } - encodeLoad64(c, allocatedAmode, rbp) - allocatedAmode.imm32 = wazevoapi.ExecutionContextOffsetOriginalStackPointer.U32() - encodeLoad64(c, allocatedAmode, rsp) - encodeRet(c) - - case ud2: - c.EmitByte(0x0f) - c.EmitByte(0x0b) - - case call: - c.EmitByte(0xe8) - // Meaning that the call target is a function value, and requires relocation. - c.AddRelocationInfo(ssa.FuncRef(i.u1)) - // Note that this is zero as a placeholder for the call target if it's a function value. - c.Emit4Bytes(uint32(i.u2)) - - case callIndirect: - op := i.op1 - - const opcodeNum = 1 - const opcode = 0xff - rex := rexInfo(0).clearW() - switch op.kind { - case operandKindReg: - dst := regEncodings[op.reg().RealReg()] - encodeRegReg(c, - legacyPrefixesNone, - opcode, opcodeNum, - regEnc(2), - dst, - rex, - ) - case operandKindMem: - m := op.addressMode() - encodeRegMem(c, - legacyPrefixesNone, - opcode, opcodeNum, - regEnc(2), - m, - rex, - ) - default: - panic("BUG: invalid operand kind") - } - - case xchg: - src, dst := regEncodings[i.op1.reg().RealReg()], i.op2 - size := i.u1 - - var rex rexInfo - var opcode uint32 - lp := legacyPrefixesNone - switch size { - case 8: - opcode = 0x87 - rex = rexInfo(0).setW() - case 4: - opcode = 0x87 - rex = rexInfo(0).clearW() - case 2: - lp = legacyPrefixes0x66 - opcode = 0x87 - rex = rexInfo(0).clearW() - case 1: - opcode = 0x86 - if i.op2.kind == operandKindReg { - panic("TODO?: xchg on two 1-byte registers") - } - // Some destinations must be encoded with REX.R = 1. - if e := src.encoding(); e >= 4 && e <= 7 { - rex = rexInfo(0).always() - } - default: - panic(fmt.Sprintf("BUG: invalid size %d: %s", size, i.String())) - } - - switch dst.kind { - case operandKindMem: - m := dst.addressMode() - encodeRegMem(c, lp, opcode, 1, src, m, rex) - case operandKindReg: - r := dst.reg().RealReg() - encodeRegReg(c, lp, opcode, 1, src, regEncodings[r], rex) - default: - panic("BUG: invalid operand kind") - } - - case lockcmpxchg: - src, dst := regEncodings[i.op1.reg().RealReg()], i.op2 - size := i.u1 - - var rex rexInfo - var opcode uint32 - lp := legacyPrefixes0xF0 // Lock prefix. - switch size { - case 8: - opcode = 0x0FB1 - rex = rexInfo(0).setW() - case 4: - opcode = 0x0FB1 - rex = rexInfo(0).clearW() - case 2: - lp = legacyPrefixes0x660xF0 // Legacy prefix + Lock prefix. - opcode = 0x0FB1 - rex = rexInfo(0).clearW() - case 1: - opcode = 0x0FB0 - // Some destinations must be encoded with REX.R = 1. - if e := src.encoding(); e >= 4 && e <= 7 { - rex = rexInfo(0).always() - } - default: - panic(fmt.Sprintf("BUG: invalid size %d: %s", size, i.String())) - } - - switch dst.kind { - case operandKindMem: - m := dst.addressMode() - encodeRegMem(c, lp, opcode, 2, src, m, rex) - default: - panic("BUG: invalid operand kind") - } - - case lockxadd: - src, dst := regEncodings[i.op1.reg().RealReg()], i.op2 - size := i.u1 - - var rex rexInfo - var opcode uint32 - lp := legacyPrefixes0xF0 // Lock prefix. - switch size { - case 8: - opcode = 0x0FC1 - rex = rexInfo(0).setW() - case 4: - opcode = 0x0FC1 - rex = rexInfo(0).clearW() - case 2: - lp = legacyPrefixes0x660xF0 // Legacy prefix + Lock prefix. - opcode = 0x0FC1 - rex = rexInfo(0).clearW() - case 1: - opcode = 0x0FC0 - // Some destinations must be encoded with REX.R = 1. - if e := src.encoding(); e >= 4 && e <= 7 { - rex = rexInfo(0).always() - } - default: - panic(fmt.Sprintf("BUG: invalid size %d: %s", size, i.String())) - } - - switch dst.kind { - case operandKindMem: - m := dst.addressMode() - encodeRegMem(c, lp, opcode, 2, src, m, rex) - default: - panic("BUG: invalid operand kind") - } - - case zeros: - r := i.op2.reg() - if r.RegType() == regalloc.RegTypeInt { - i.asAluRmiR(aluRmiROpcodeXor, newOperandReg(r), r, true) - } else { - i.asXmmRmR(sseOpcodePxor, newOperandReg(r), r) - } - i.encode(c) - - case mfence: - // https://www.felixcloutier.com/x86/mfence - c.EmitByte(0x0f) - c.EmitByte(0xae) - c.EmitByte(0xf0) - - default: - panic(fmt.Sprintf("TODO: %v", i.kind)) - } - return -} - -func encodeLoad64(c backend.Compiler, m *amode, rd regalloc.RealReg) { - dst := regEncodings[rd] - encodeRegMem(c, legacyPrefixesNone, 0x8b, 1, dst, m, rexInfo(0).setW()) -} - -func encodeRet(c backend.Compiler) { - c.EmitByte(0xc3) -} - -func encodeEncEnc( - c backend.Compiler, - legPrefixes legacyPrefixes, - opcodes uint32, - opcodeNum uint32, - r uint8, - rm uint8, - rex rexInfo, -) { - legPrefixes.encode(c) - rex.encode(c, r>>3, rm>>3) - - for opcodeNum > 0 { - opcodeNum-- - c.EmitByte(byte((opcodes >> (opcodeNum << 3)) & 0xff)) - } - c.EmitByte(encodeModRM(3, r&7, rm&7)) -} - -func encodeRegReg( - c backend.Compiler, - legPrefixes legacyPrefixes, - opcodes uint32, - opcodeNum uint32, - r regEnc, - rm regEnc, - rex rexInfo, -) { - encodeEncEnc(c, legPrefixes, opcodes, opcodeNum, uint8(r), uint8(rm), rex) -} - -func encodeModRM(mod byte, reg byte, rm byte) byte { - return mod<<6 | reg<<3 | rm -} - -func encodeSIB(shift byte, encIndex byte, encBase byte) byte { - return shift<<6 | encIndex<<3 | encBase -} - -func encodeRegMem( - c backend.Compiler, legPrefixes legacyPrefixes, opcodes uint32, opcodeNum uint32, r regEnc, m *amode, rex rexInfo, -) (needsLabelResolution bool) { - needsLabelResolution = encodeEncMem(c, legPrefixes, opcodes, opcodeNum, uint8(r), m, rex) - return -} - -func encodeEncMem( - c backend.Compiler, legPrefixes legacyPrefixes, opcodes uint32, opcodeNum uint32, r uint8, m *amode, rex rexInfo, -) (needsLabelResolution bool) { - legPrefixes.encode(c) - - const ( - modNoDisplacement = 0b00 - modShortDisplacement = 0b01 - modLongDisplacement = 0b10 - - useSBI = 4 // the encoding of rsp or r12 register. - ) - - switch m.kind() { - case amodeImmReg, amodeImmRBP: - base := m.base.RealReg() - baseEnc := regEncodings[base] - - rex.encode(c, regRexBit(r), baseEnc.rexBit()) - - for opcodeNum > 0 { - opcodeNum-- - c.EmitByte(byte((opcodes >> (opcodeNum << 3)) & 0xff)) - } - - // SIB byte is the last byte of the memory encoding before the displacement - const sibByte = 0x24 // == encodeSIB(0, 4, 4) - - immZero, baseRbp, baseR13 := m.imm32 == 0, base == rbp, base == r13 - short := lower8willSignExtendTo32(m.imm32) - rspOrR12 := base == rsp || base == r12 - - if immZero && !baseRbp && !baseR13 { // rbp or r13 can't be used as base for without displacement encoding. - c.EmitByte(encodeModRM(modNoDisplacement, regEncoding(r), baseEnc.encoding())) - if rspOrR12 { - c.EmitByte(sibByte) - } - } else if short { // Note: this includes the case where m.imm32 == 0 && base == rbp || base == r13. - c.EmitByte(encodeModRM(modShortDisplacement, regEncoding(r), baseEnc.encoding())) - if rspOrR12 { - c.EmitByte(sibByte) - } - c.EmitByte(byte(m.imm32)) - } else { - c.EmitByte(encodeModRM(modLongDisplacement, regEncoding(r), baseEnc.encoding())) - if rspOrR12 { - c.EmitByte(sibByte) - } - c.Emit4Bytes(m.imm32) - } - - case amodeRegRegShift: - base := m.base.RealReg() - baseEnc := regEncodings[base] - index := m.index.RealReg() - indexEnc := regEncodings[index] - - if index == rsp { - panic("BUG: rsp can't be used as index of addressing mode") - } - - rex.encodeForIndex(c, regEnc(r), indexEnc, baseEnc) - - for opcodeNum > 0 { - opcodeNum-- - c.EmitByte(byte((opcodes >> (opcodeNum << 3)) & 0xff)) - } - - immZero, baseRbp, baseR13 := m.imm32 == 0, base == rbp, base == r13 - if immZero && !baseRbp && !baseR13 { // rbp or r13 can't be used as base for without displacement encoding. (curious why? because it's interpreted as RIP relative addressing). - c.EmitByte(encodeModRM(modNoDisplacement, regEncoding(r), useSBI)) - c.EmitByte(encodeSIB(m.shift(), indexEnc.encoding(), baseEnc.encoding())) - } else if lower8willSignExtendTo32(m.imm32) { - c.EmitByte(encodeModRM(modShortDisplacement, regEncoding(r), useSBI)) - c.EmitByte(encodeSIB(m.shift(), indexEnc.encoding(), baseEnc.encoding())) - c.EmitByte(byte(m.imm32)) - } else { - c.EmitByte(encodeModRM(modLongDisplacement, regEncoding(r), useSBI)) - c.EmitByte(encodeSIB(m.shift(), indexEnc.encoding(), baseEnc.encoding())) - c.Emit4Bytes(m.imm32) - } - - case amodeRipRel: - rex.encode(c, regRexBit(r), 0) - for opcodeNum > 0 { - opcodeNum-- - c.EmitByte(byte((opcodes >> (opcodeNum << 3)) & 0xff)) - } - - // Indicate "LEAQ [RIP + 32bit displacement]. - // https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing - c.EmitByte(encodeModRM(0b00, regEncoding(r), 0b101)) - - // This will be resolved later, so we just emit a placeholder. - needsLabelResolution = true - c.Emit4Bytes(0) - - default: - panic("BUG: invalid addressing mode") - } - return -} - -const ( - rexEncodingDefault byte = 0x40 - rexEncodingW = rexEncodingDefault | 0x08 -) - -// rexInfo is a bit set to indicate: -// -// 0x01: W bit must be cleared. -// 0x02: REX prefix must be emitted. -type rexInfo byte - -func (ri rexInfo) setW() rexInfo { - return ri | 0x01 -} - -func (ri rexInfo) clearW() rexInfo { - return ri & 0x02 -} - -func (ri rexInfo) always() rexInfo { - return ri | 0x02 -} - -func (ri rexInfo) notAlways() rexInfo { //nolint - return ri & 0x01 -} - -func (ri rexInfo) encode(c backend.Compiler, r uint8, b uint8) { - var w byte = 0 - if ri&0x01 != 0 { - w = 0x01 - } - rex := rexEncodingDefault | w<<3 | r<<2 | b - if rex != rexEncodingDefault || ri&0x02 != 0 { - c.EmitByte(rex) - } -} - -func (ri rexInfo) encodeForIndex(c backend.Compiler, encR regEnc, encIndex regEnc, encBase regEnc) { - var w byte = 0 - if ri&0x01 != 0 { - w = 0x01 - } - r := encR.rexBit() - x := encIndex.rexBit() - b := encBase.rexBit() - rex := byte(0x40) | w<<3 | r<<2 | x<<1 | b - if rex != 0x40 || ri&0x02 != 0 { - c.EmitByte(rex) - } -} - -type regEnc byte - -func (r regEnc) rexBit() byte { - return regRexBit(byte(r)) -} - -func (r regEnc) encoding() byte { - return regEncoding(byte(r)) -} - -func regRexBit(r byte) byte { - return r >> 3 -} - -func regEncoding(r byte) byte { - return r & 0x07 -} - -var regEncodings = [...]regEnc{ - rax: 0b000, - rcx: 0b001, - rdx: 0b010, - rbx: 0b011, - rsp: 0b100, - rbp: 0b101, - rsi: 0b110, - rdi: 0b111, - r8: 0b1000, - r9: 0b1001, - r10: 0b1010, - r11: 0b1011, - r12: 0b1100, - r13: 0b1101, - r14: 0b1110, - r15: 0b1111, - xmm0: 0b000, - xmm1: 0b001, - xmm2: 0b010, - xmm3: 0b011, - xmm4: 0b100, - xmm5: 0b101, - xmm6: 0b110, - xmm7: 0b111, - xmm8: 0b1000, - xmm9: 0b1001, - xmm10: 0b1010, - xmm11: 0b1011, - xmm12: 0b1100, - xmm13: 0b1101, - xmm14: 0b1110, - xmm15: 0b1111, -} - -type legacyPrefixes byte - -const ( - legacyPrefixesNone legacyPrefixes = iota - legacyPrefixes0x66 - legacyPrefixes0xF0 - legacyPrefixes0x660xF0 - legacyPrefixes0xF2 - legacyPrefixes0xF3 -) - -func (p legacyPrefixes) encode(c backend.Compiler) { - switch p { - case legacyPrefixesNone: - case legacyPrefixes0x66: - c.EmitByte(0x66) - case legacyPrefixes0xF0: - c.EmitByte(0xf0) - case legacyPrefixes0x660xF0: - c.EmitByte(0x66) - c.EmitByte(0xf0) - case legacyPrefixes0xF2: - c.EmitByte(0xf2) - case legacyPrefixes0xF3: - c.EmitByte(0xf3) - default: - panic("BUG: invalid legacy prefix") - } -} - -func lower32willSignExtendTo64(x uint64) bool { - xs := int64(x) - return xs == int64(uint64(int32(xs))) -} - -func lower8willSignExtendTo32(x uint32) bool { - xs := int32(x) - return xs == ((xs << 24) >> 24) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_constant.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_constant.go deleted file mode 100644 index 55d05ef63..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_constant.go +++ /dev/null @@ -1,71 +0,0 @@ -package amd64 - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -// lowerConstant allocates a new VReg and inserts the instruction to load the constant value. -func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) { - val := instr.Return() - valType := val.Type() - - vr = m.c.AllocateVReg(valType) - m.insertLoadConstant(instr, vr) - return -} - -// InsertLoadConstantBlockArg implements backend.Machine. -func (m *machine) InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) { - m.insertLoadConstant(instr, vr) -} - -func (m *machine) insertLoadConstant(instr *ssa.Instruction, vr regalloc.VReg) { - val := instr.Return() - valType := val.Type() - v := instr.ConstantVal() - - bits := valType.Bits() - if bits < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc. - v = v & ((1 << valType.Bits()) - 1) - } - - switch valType { - case ssa.TypeF32, ssa.TypeF64: - m.lowerFconst(vr, v, bits == 64) - case ssa.TypeI32, ssa.TypeI64: - m.lowerIconst(vr, v, bits == 64) - default: - panic("BUG") - } -} - -func (m *machine) lowerFconst(dst regalloc.VReg, c uint64, _64 bool) { - if c == 0 { - xor := m.allocateInstr().asZeros(dst) - m.insert(xor) - } else { - var tmpType ssa.Type - if _64 { - tmpType = ssa.TypeI64 - } else { - tmpType = ssa.TypeI32 - } - tmpInt := m.c.AllocateVReg(tmpType) - loadToGP := m.allocateInstr().asImm(tmpInt, c, _64) - m.insert(loadToGP) - - movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpInt), dst, _64) - m.insert(movToXmm) - } -} - -func (m *machine) lowerIconst(dst regalloc.VReg, c uint64, _64 bool) { - i := m.allocateInstr() - if c == 0 { - i.asZeros(dst) - } else { - i.asImm(dst, c, _64) - } - m.insert(i) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go deleted file mode 100644 index befe8c643..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go +++ /dev/null @@ -1,187 +0,0 @@ -package amd64 - -import ( - "fmt" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -var addendsMatchOpcodes = [...]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst, ssa.OpcodeIshl} - -type addend struct { - r regalloc.VReg - off int64 - shift byte -} - -func (a addend) String() string { - return fmt.Sprintf("addend{r=%s, off=%d, shift=%d}", a.r, a.off, a.shift) -} - -// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions. -func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32) (am *amode) { - def := m.c.ValueDefinition(ptr) - - if offsetBase&0x80000000 != 0 { - // Special casing the huge base offset whose MSB is set. In x64, the immediate is always - // sign-extended, but our IR semantics requires the offset base is always unsigned. - // Note that this should be extremely rare or even this shouldn't hit in the real application, - // therefore we don't need to optimize this case in my opinion. - - a := m.lowerAddend(def) - off64 := a.off + int64(offsetBase) - offsetBaseReg := m.c.AllocateVReg(ssa.TypeI64) - m.lowerIconst(offsetBaseReg, uint64(off64), true) - if a.r != regalloc.VRegInvalid { - return m.newAmodeRegRegShift(0, offsetBaseReg, a.r, a.shift) - } else { - return m.newAmodeImmReg(0, offsetBaseReg) - } - } - - if op := m.c.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op == ssa.OpcodeIadd { - add := def.Instr - x, y := add.Arg2() - xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y) - ax := m.lowerAddend(xDef) - ay := m.lowerAddend(yDef) - add.MarkLowered() - return m.lowerAddendsToAmode(ax, ay, offsetBase) - } else { - // If it is not an Iadd, then we lower the one addend. - a := m.lowerAddend(def) - // off is always 0 if r is valid. - if a.r != regalloc.VRegInvalid { - if a.shift != 0 { - tmpReg := m.c.AllocateVReg(ssa.TypeI64) - m.lowerIconst(tmpReg, 0, true) - return m.newAmodeRegRegShift(offsetBase, tmpReg, a.r, a.shift) - } - return m.newAmodeImmReg(offsetBase, a.r) - } else { - off64 := a.off + int64(offsetBase) - tmpReg := m.c.AllocateVReg(ssa.TypeI64) - m.lowerIconst(tmpReg, uint64(off64), true) - return m.newAmodeImmReg(0, tmpReg) - } - } -} - -func (m *machine) lowerAddendsToAmode(x, y addend, offBase uint32) *amode { - if x.r != regalloc.VRegInvalid && x.off != 0 || y.r != regalloc.VRegInvalid && y.off != 0 { - panic("invalid input") - } - - u64 := uint64(x.off+y.off) + uint64(offBase) - if u64 != 0 { - if _, ok := asImm32(u64, false); !ok { - tmpReg := m.c.AllocateVReg(ssa.TypeI64) - m.lowerIconst(tmpReg, u64, true) - // Blank u64 as it has been already lowered. - u64 = 0 - - if x.r == regalloc.VRegInvalid { - x.r = tmpReg - } else if y.r == regalloc.VRegInvalid { - y.r = tmpReg - } else { - // We already know that either rx or ry is invalid, - // so we overwrite it with the temporary register. - panic("BUG") - } - } - } - - u32 := uint32(u64) - switch { - // We assume rx, ry are valid iff offx, offy are 0. - case x.r != regalloc.VRegInvalid && y.r != regalloc.VRegInvalid: - switch { - case x.shift != 0 && y.shift != 0: - // Cannot absorb two shifted registers, must lower one to a shift instruction. - shifted := m.allocateInstr() - shifted.asShiftR(shiftROpShiftLeft, newOperandImm32(uint32(x.shift)), x.r, true) - m.insert(shifted) - - return m.newAmodeRegRegShift(u32, x.r, y.r, y.shift) - case x.shift != 0 && y.shift == 0: - // Swap base and index. - x, y = y, x - fallthrough - default: - return m.newAmodeRegRegShift(u32, x.r, y.r, y.shift) - } - case x.r == regalloc.VRegInvalid && y.r != regalloc.VRegInvalid: - x, y = y, x - fallthrough - case x.r != regalloc.VRegInvalid && y.r == regalloc.VRegInvalid: - if x.shift != 0 { - zero := m.c.AllocateVReg(ssa.TypeI64) - m.lowerIconst(zero, 0, true) - return m.newAmodeRegRegShift(u32, zero, x.r, x.shift) - } - return m.newAmodeImmReg(u32, x.r) - default: // Both are invalid: use the offset. - tmpReg := m.c.AllocateVReg(ssa.TypeI64) - m.lowerIconst(tmpReg, u64, true) - return m.newAmodeImmReg(0, tmpReg) - } -} - -func (m *machine) lowerAddend(x backend.SSAValueDefinition) addend { - if !x.IsFromInstr() { - return addend{m.c.VRegOf(x.V), 0, 0} - } - // Ensure the addend is not referenced in multiple places; we will discard nested Iadds. - op := m.c.MatchInstrOneOf(x, addendsMatchOpcodes[:]) - if op != ssa.OpcodeInvalid && op != ssa.OpcodeIadd { - return m.lowerAddendFromInstr(x.Instr) - } - p := m.getOperand_Reg(x) - return addend{p.reg(), 0, 0} -} - -// lowerAddendFromInstr takes an instruction returns a Vreg and an offset that can be used in an address mode. -// The Vreg is regalloc.VRegInvalid if the addend cannot be lowered to a register. -// The offset is 0 if the addend can be lowered to a register. -func (m *machine) lowerAddendFromInstr(instr *ssa.Instruction) addend { - instr.MarkLowered() - switch op := instr.Opcode(); op { - case ssa.OpcodeIconst: - u64 := instr.ConstantVal() - if instr.Return().Type().Bits() == 32 { - return addend{regalloc.VRegInvalid, int64(int32(u64)), 0} // sign-extend. - } else { - return addend{regalloc.VRegInvalid, int64(u64), 0} - } - case ssa.OpcodeUExtend, ssa.OpcodeSExtend: - input := instr.Arg() - inputDef := m.c.ValueDefinition(input) - if input.Type().Bits() != 32 { - panic("BUG: invalid input type " + input.Type().String()) - } - constInst := inputDef.IsFromInstr() && inputDef.Instr.Constant() - switch { - case constInst && op == ssa.OpcodeSExtend: - return addend{regalloc.VRegInvalid, int64(uint32(inputDef.Instr.ConstantVal())), 0} - case constInst && op == ssa.OpcodeUExtend: - return addend{regalloc.VRegInvalid, int64(int32(inputDef.Instr.ConstantVal())), 0} // sign-extend! - default: - r := m.getOperand_Reg(inputDef) - return addend{r.reg(), 0, 0} - } - case ssa.OpcodeIshl: - // If the addend is a shift, we can only handle it if the shift amount is a constant. - x, amount := instr.Arg2() - amountDef := m.c.ValueDefinition(amount) - if amountDef.IsFromInstr() && amountDef.Instr.Constant() && amountDef.Instr.ConstantVal() <= 3 { - r := m.getOperand_Reg(m.c.ValueDefinition(x)) - return addend{r.reg(), 0, uint8(amountDef.Instr.ConstantVal())} - } - r := m.getOperand_Reg(m.c.ValueDefinition(x)) - return addend{r.reg(), 0, 0} - } - panic("BUG: invalid opcode") -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go deleted file mode 100644 index 7c27c92af..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go +++ /dev/null @@ -1,3729 +0,0 @@ -package amd64 - -import ( - "context" - "encoding/binary" - "fmt" - "math" - "strings" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" - "github.com/tetratelabs/wazero/internal/platform" -) - -// NewBackend returns a new backend for arm64. -func NewBackend() backend.Machine { - m := &machine{ - cpuFeatures: platform.CpuFeatures, - regAlloc: regalloc.NewAllocator[*instruction, *labelPosition, *regAllocFn](regInfo), - spillSlots: map[regalloc.VRegID]int64{}, - amodePool: wazevoapi.NewPool[amode](nil), - labelPositionPool: wazevoapi.NewIDedPool[labelPosition](resetLabelPosition), - instrPool: wazevoapi.NewPool[instruction](resetInstruction), - constSwizzleMaskConstIndex: -1, - constSqmulRoundSatIndex: -1, - constI8x16SHLMaskTableIndex: -1, - constI8x16LogicalSHRMaskTableIndex: -1, - constF64x2CvtFromIMaskIndex: -1, - constTwop52Index: -1, - constI32sMaxOnF64x2Index: -1, - constI32uMaxOnF64x2Index: -1, - constAllOnesI8x16Index: -1, - constAllOnesI16x8Index: -1, - constExtAddPairwiseI16x8uMask1Index: -1, - constExtAddPairwiseI16x8uMask2Index: -1, - } - m.regAllocFn.m = m - return m -} - -type ( - // machine implements backend.Machine for amd64. - machine struct { - c backend.Compiler - stackBoundsCheckDisabled bool - - instrPool wazevoapi.Pool[instruction] - amodePool wazevoapi.Pool[amode] - - cpuFeatures platform.CpuFeatureFlags - - regAlloc regalloc.Allocator[*instruction, *labelPosition, *regAllocFn] - regAllocFn regAllocFn - regAllocStarted bool - - // labelPositionPool is the pool of labelPosition. The id is the label where - // if the label is less than the maxSSABlockID, it's the ssa.BasicBlockID. - labelPositionPool wazevoapi.IDedPool[labelPosition] - // nextLabel is the next label to be allocated. The first free label comes after maxSSABlockID - // so that we can have an identical label for the SSA block ID, which is useful for debugging. - nextLabel label - // rootInstr is the first instruction of the function. - rootInstr *instruction - // currentLabelPos is the currently-compiled ssa.BasicBlock's labelPosition. - currentLabelPos *labelPosition - // orderedSSABlockLabelPos is the ordered list of labelPosition in the generated code for each ssa.BasicBlock. - orderedSSABlockLabelPos []*labelPosition - // returnLabelPos is the labelPosition for the return block. - returnLabelPos labelPosition - // perBlockHead and perBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock. - perBlockHead, perBlockEnd *instruction - // pendingInstructions are the instructions which are not yet emitted into the instruction list. - pendingInstructions []*instruction - // maxSSABlockID is the maximum ssa.BasicBlockID in the current function. - maxSSABlockID label - - spillSlotSize int64 - spillSlots map[regalloc.VRegID]int64 - currentABI *backend.FunctionABI - clobberedRegs []regalloc.VReg - - maxRequiredStackSizeForCalls int64 - - labelResolutionPends []labelResolutionPend - - // jmpTableTargets holds the labels of the jump table targets. - jmpTableTargets [][]uint32 - // jmpTableTargetNext is the index to the jmpTableTargets slice to be used for the next jump table. - jmpTableTargetsNext int - consts []_const - - constSwizzleMaskConstIndex, constSqmulRoundSatIndex, - constI8x16SHLMaskTableIndex, constI8x16LogicalSHRMaskTableIndex, - constF64x2CvtFromIMaskIndex, constTwop52Index, - constI32sMaxOnF64x2Index, constI32uMaxOnF64x2Index, - constAllOnesI8x16Index, constAllOnesI16x8Index, - constExtAddPairwiseI16x8uMask1Index, constExtAddPairwiseI16x8uMask2Index int - } - - _const struct { - lo, hi uint64 - _var []byte - label label - labelPos *labelPosition - } - - labelResolutionPend struct { - instr *instruction - instrOffset int64 - // imm32Offset is the offset of the last 4 bytes of the instruction. - imm32Offset int64 - } -) - -type ( - // label represents a position in the generated code which is either - // a real instruction or the constant InstructionPool (e.g. jump tables). - // - // This is exactly the same as the traditional "label" in assembly code. - label uint32 - - // labelPosition represents the regions of the generated code which the label represents. - // This implements regalloc.Block. - labelPosition struct { - // sb is not nil if this corresponds to a ssa.BasicBlock. - sb ssa.BasicBlock - // cur is used to walk through the instructions in the block during the register allocation. - cur, - // begin and end are the first and last instructions of the block. - begin, end *instruction - // binaryOffset is the offset in the binary where the label is located. - binaryOffset int64 - } -) - -// String implements backend.Machine. -func (l label) String() string { - return fmt.Sprintf("L%d", l) -} - -func resetLabelPosition(l *labelPosition) { - *l = labelPosition{} -} - -const labelReturn = math.MaxUint32 - -func ssaBlockLabel(sb ssa.BasicBlock) label { - if sb.ReturnBlock() { - return labelReturn - } - return label(sb.ID()) -} - -// getOrAllocateSSABlockLabelPosition returns the labelPosition for the given basic block. -func (m *machine) getOrAllocateSSABlockLabelPosition(sb ssa.BasicBlock) *labelPosition { - if sb.ReturnBlock() { - m.returnLabelPos.sb = sb - return &m.returnLabelPos - } - - l := ssaBlockLabel(sb) - pos := m.labelPositionPool.GetOrAllocate(int(l)) - pos.sb = sb - return pos -} - -func (m *machine) getOrAllocateConstLabel(i *int, _var []byte) label { - index := *i - if index == -1 { - l, pos := m.allocateLabel() - index = len(m.consts) - m.consts = append(m.consts, _const{ - _var: _var, - label: l, - labelPos: pos, - }) - *i = index - } - return m.consts[index].label -} - -// Reset implements backend.Machine. -func (m *machine) Reset() { - m.consts = m.consts[:0] - m.clobberedRegs = m.clobberedRegs[:0] - for key := range m.spillSlots { - m.clobberedRegs = append(m.clobberedRegs, regalloc.VReg(key)) - } - for _, key := range m.clobberedRegs { - delete(m.spillSlots, regalloc.VRegID(key)) - } - - m.stackBoundsCheckDisabled = false - m.regAlloc.Reset() - m.labelPositionPool.Reset() - m.instrPool.Reset() - m.regAllocStarted = false - m.clobberedRegs = m.clobberedRegs[:0] - - m.spillSlotSize = 0 - m.maxRequiredStackSizeForCalls = 0 - m.perBlockHead, m.perBlockEnd, m.rootInstr = nil, nil, nil - m.pendingInstructions = m.pendingInstructions[:0] - m.orderedSSABlockLabelPos = m.orderedSSABlockLabelPos[:0] - - m.amodePool.Reset() - m.jmpTableTargetsNext = 0 - m.constSwizzleMaskConstIndex = -1 - m.constSqmulRoundSatIndex = -1 - m.constI8x16SHLMaskTableIndex = -1 - m.constI8x16LogicalSHRMaskTableIndex = -1 - m.constF64x2CvtFromIMaskIndex = -1 - m.constTwop52Index = -1 - m.constI32sMaxOnF64x2Index = -1 - m.constI32uMaxOnF64x2Index = -1 - m.constAllOnesI8x16Index = -1 - m.constAllOnesI16x8Index = -1 - m.constExtAddPairwiseI16x8uMask1Index = -1 - m.constExtAddPairwiseI16x8uMask2Index = -1 -} - -// StartLoweringFunction implements backend.Machine StartLoweringFunction. -func (m *machine) StartLoweringFunction(maxBlockID ssa.BasicBlockID) { - m.maxSSABlockID = label(maxBlockID) - m.nextLabel = label(maxBlockID) + 1 -} - -// LinkAdjacentBlocks implements backend.Machine. -func (m *machine) LinkAdjacentBlocks(prev, next ssa.BasicBlock) { - prevPos, nextPos := m.getOrAllocateSSABlockLabelPosition(prev), m.getOrAllocateSSABlockLabelPosition(next) - prevPos.end.next = nextPos.begin -} - -// StartBlock implements backend.Machine. -func (m *machine) StartBlock(blk ssa.BasicBlock) { - m.currentLabelPos = m.getOrAllocateSSABlockLabelPosition(blk) - labelPos := m.currentLabelPos - end := m.allocateNop() - m.perBlockHead, m.perBlockEnd = end, end - labelPos.begin, labelPos.end = end, end - m.orderedSSABlockLabelPos = append(m.orderedSSABlockLabelPos, labelPos) -} - -// EndBlock implements ExecutableContext. -func (m *machine) EndBlock() { - // Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions. - m.insertAtPerBlockHead(m.allocateNop()) - - m.currentLabelPos.begin = m.perBlockHead - - if m.currentLabelPos.sb.EntryBlock() { - m.rootInstr = m.perBlockHead - } -} - -func (m *machine) insertAtPerBlockHead(i *instruction) { - if m.perBlockHead == nil { - m.perBlockHead = i - m.perBlockEnd = i - return - } - - i.next = m.perBlockHead - m.perBlockHead.prev = i - m.perBlockHead = i -} - -// FlushPendingInstructions implements backend.Machine. -func (m *machine) FlushPendingInstructions() { - l := len(m.pendingInstructions) - if l == 0 { - return - } - for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order. - m.insertAtPerBlockHead(m.pendingInstructions[i]) - } - m.pendingInstructions = m.pendingInstructions[:0] -} - -// DisableStackCheck implements backend.Machine. -func (m *machine) DisableStackCheck() { m.stackBoundsCheckDisabled = true } - -// SetCompiler implements backend.Machine. -func (m *machine) SetCompiler(c backend.Compiler) { - m.c = c - m.regAllocFn.ssaB = c.SSABuilder() -} - -// SetCurrentABI implements backend.Machine. -func (m *machine) SetCurrentABI(abi *backend.FunctionABI) { m.currentABI = abi } - -// RegAlloc implements backend.Machine. -func (m *machine) RegAlloc() { - rf := m.regAllocFn - m.regAllocStarted = true - m.regAlloc.DoAllocation(&rf) - // Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes. - m.spillSlotSize = (m.spillSlotSize + 15) &^ 15 -} - -// InsertReturn implements backend.Machine. -func (m *machine) InsertReturn() { - i := m.allocateInstr().asRet() - m.insert(i) -} - -// LowerSingleBranch implements backend.Machine. -func (m *machine) LowerSingleBranch(b *ssa.Instruction) { - switch b.Opcode() { - case ssa.OpcodeJump: - _, _, targetBlkID := b.BranchData() - if b.IsFallthroughJump() { - return - } - jmp := m.allocateInstr() - target := ssaBlockLabel(m.c.SSABuilder().BasicBlock(targetBlkID)) - if target == labelReturn { - jmp.asRet() - } else { - jmp.asJmp(newOperandLabel(target)) - } - m.insert(jmp) - case ssa.OpcodeBrTable: - index, targetBlkIDs := b.BrTableData() - m.lowerBrTable(index, targetBlkIDs) - default: - panic("BUG: unexpected branch opcode" + b.Opcode().String()) - } -} - -func (m *machine) addJmpTableTarget(targets ssa.Values) (index int) { - if m.jmpTableTargetsNext == len(m.jmpTableTargets) { - m.jmpTableTargets = append(m.jmpTableTargets, make([]uint32, 0, len(targets.View()))) - } - - index = m.jmpTableTargetsNext - m.jmpTableTargetsNext++ - m.jmpTableTargets[index] = m.jmpTableTargets[index][:0] - for _, targetBlockID := range targets.View() { - target := m.c.SSABuilder().BasicBlock(ssa.BasicBlockID(targetBlockID)) - m.jmpTableTargets[index] = append(m.jmpTableTargets[index], uint32(ssaBlockLabel(target))) - } - return -} - -var condBranchMatches = [...]ssa.Opcode{ssa.OpcodeIcmp, ssa.OpcodeFcmp} - -func (m *machine) lowerBrTable(index ssa.Value, targets ssa.Values) { - _v := m.getOperand_Reg(m.c.ValueDefinition(index)) - v := m.copyToTmp(_v.reg()) - - targetCount := len(targets.View()) - - // First, we need to do the bounds check. - maxIndex := m.c.AllocateVReg(ssa.TypeI32) - m.lowerIconst(maxIndex, uint64(targetCount-1), false) - cmp := m.allocateInstr().asCmpRmiR(true, newOperandReg(maxIndex), v, false) - m.insert(cmp) - - // Then do the conditional move maxIndex to v if v > maxIndex. - cmov := m.allocateInstr().asCmove(condNB, newOperandReg(maxIndex), v, false) - m.insert(cmov) - - // Now that v has the correct index. Load the address of the jump table into the addr. - addr := m.c.AllocateVReg(ssa.TypeI64) - leaJmpTableAddr := m.allocateInstr() - m.insert(leaJmpTableAddr) - - // Then add the target's offset into jmpTableAddr. - loadTargetOffsetFromJmpTable := m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, - // Shift by 3 because each entry is 8 bytes. - newOperandMem(m.newAmodeRegRegShift(0, addr, v, 3)), addr, true) - m.insert(loadTargetOffsetFromJmpTable) - - // Now ready to jump. - jmp := m.allocateInstr().asJmp(newOperandReg(addr)) - m.insert(jmp) - - jmpTableBegin, jmpTableBeginLabel := m.allocateBrTarget() - m.insert(jmpTableBegin) - leaJmpTableAddr.asLEA(newOperandLabel(jmpTableBeginLabel), addr) - - jmpTable := m.allocateInstr() - targetSliceIndex := m.addJmpTableTarget(targets) - jmpTable.asJmpTableSequence(targetSliceIndex, targetCount) - m.insert(jmpTable) -} - -// LowerConditionalBranch implements backend.Machine. -func (m *machine) LowerConditionalBranch(b *ssa.Instruction) { - cval, args, targetBlkID := b.BranchData() - if len(args) > 0 { - panic(fmt.Sprintf( - "conditional branch shouldn't have args; likely a bug in critical edge splitting: from %s to %s", - m.currentLabelPos.sb, - targetBlkID, - )) - } - - target := ssaBlockLabel(m.c.SSABuilder().BasicBlock(targetBlkID)) - cvalDef := m.c.ValueDefinition(cval) - - switch m.c.MatchInstrOneOf(cvalDef, condBranchMatches[:]) { - case ssa.OpcodeIcmp: - cvalInstr := cvalDef.Instr - x, y, c := cvalInstr.IcmpData() - - cc := condFromSSAIntCmpCond(c) - if b.Opcode() == ssa.OpcodeBrz { - cc = cc.invert() - } - - // First, perform the comparison and set the flag. - xd, yd := m.c.ValueDefinition(x), m.c.ValueDefinition(y) - if !m.tryLowerBandToFlag(xd, yd) { - m.lowerIcmpToFlag(xd, yd, x.Type() == ssa.TypeI64) - } - - // Then perform the conditional branch. - m.insert(m.allocateInstr().asJmpIf(cc, newOperandLabel(target))) - cvalDef.Instr.MarkLowered() - case ssa.OpcodeFcmp: - cvalInstr := cvalDef.Instr - - f1, f2, and := m.lowerFcmpToFlags(cvalInstr) - isBrz := b.Opcode() == ssa.OpcodeBrz - if isBrz { - f1 = f1.invert() - } - if f2 == condInvalid { - m.insert(m.allocateInstr().asJmpIf(f1, newOperandLabel(target))) - } else { - if isBrz { - f2 = f2.invert() - and = !and - } - jmp1, jmp2 := m.allocateInstr(), m.allocateInstr() - m.insert(jmp1) - m.insert(jmp2) - notTaken, notTakenLabel := m.allocateBrTarget() - m.insert(notTaken) - if and { - jmp1.asJmpIf(f1.invert(), newOperandLabel(notTakenLabel)) - jmp2.asJmpIf(f2, newOperandLabel(target)) - } else { - jmp1.asJmpIf(f1, newOperandLabel(target)) - jmp2.asJmpIf(f2, newOperandLabel(target)) - } - } - - cvalDef.Instr.MarkLowered() - default: - v := m.getOperand_Reg(cvalDef) - - var cc cond - if b.Opcode() == ssa.OpcodeBrz { - cc = condZ - } else { - cc = condNZ - } - - // Perform test %v, %v to set the flag. - cmp := m.allocateInstr().asCmpRmiR(false, v, v.reg(), false) - m.insert(cmp) - m.insert(m.allocateInstr().asJmpIf(cc, newOperandLabel(target))) - } -} - -// LowerInstr implements backend.Machine. -func (m *machine) LowerInstr(instr *ssa.Instruction) { - if l := instr.SourceOffset(); l.Valid() { - info := m.allocateInstr().asEmitSourceOffsetInfo(l) - m.insert(info) - } - - switch op := instr.Opcode(); op { - case ssa.OpcodeBrz, ssa.OpcodeBrnz, ssa.OpcodeJump, ssa.OpcodeBrTable: - panic("BUG: branching instructions are handled by LowerBranches") - case ssa.OpcodeReturn: - panic("BUG: return must be handled by backend.Compiler") - case ssa.OpcodeIconst, ssa.OpcodeF32const, ssa.OpcodeF64const: // Constant instructions are inlined. - case ssa.OpcodeCall, ssa.OpcodeCallIndirect: - m.lowerCall(instr) - case ssa.OpcodeStore, ssa.OpcodeIstore8, ssa.OpcodeIstore16, ssa.OpcodeIstore32: - m.lowerStore(instr) - case ssa.OpcodeIadd: - m.lowerAluRmiROp(instr, aluRmiROpcodeAdd) - case ssa.OpcodeIsub: - m.lowerAluRmiROp(instr, aluRmiROpcodeSub) - case ssa.OpcodeImul: - m.lowerAluRmiROp(instr, aluRmiROpcodeMul) - case ssa.OpcodeSdiv, ssa.OpcodeUdiv, ssa.OpcodeSrem, ssa.OpcodeUrem: - isDiv := op == ssa.OpcodeSdiv || op == ssa.OpcodeUdiv - isSigned := op == ssa.OpcodeSdiv || op == ssa.OpcodeSrem - m.lowerIDivRem(instr, isDiv, isSigned) - case ssa.OpcodeBand: - m.lowerAluRmiROp(instr, aluRmiROpcodeAnd) - case ssa.OpcodeBor: - m.lowerAluRmiROp(instr, aluRmiROpcodeOr) - case ssa.OpcodeBxor: - m.lowerAluRmiROp(instr, aluRmiROpcodeXor) - case ssa.OpcodeIshl: - m.lowerShiftR(instr, shiftROpShiftLeft) - case ssa.OpcodeSshr: - m.lowerShiftR(instr, shiftROpShiftRightArithmetic) - case ssa.OpcodeUshr: - m.lowerShiftR(instr, shiftROpShiftRightLogical) - case ssa.OpcodeRotl: - m.lowerShiftR(instr, shiftROpRotateLeft) - case ssa.OpcodeRotr: - m.lowerShiftR(instr, shiftROpRotateRight) - case ssa.OpcodeClz: - m.lowerClz(instr) - case ssa.OpcodeCtz: - m.lowerCtz(instr) - case ssa.OpcodePopcnt: - m.lowerUnaryRmR(instr, unaryRmROpcodePopcnt) - case ssa.OpcodeFadd, ssa.OpcodeFsub, ssa.OpcodeFmul, ssa.OpcodeFdiv: - m.lowerXmmRmR(instr) - case ssa.OpcodeFabs: - m.lowerFabsFneg(instr) - case ssa.OpcodeFneg: - m.lowerFabsFneg(instr) - case ssa.OpcodeCeil: - m.lowerRound(instr, roundingModeUp) - case ssa.OpcodeFloor: - m.lowerRound(instr, roundingModeDown) - case ssa.OpcodeTrunc: - m.lowerRound(instr, roundingModeZero) - case ssa.OpcodeNearest: - m.lowerRound(instr, roundingModeNearest) - case ssa.OpcodeFmin, ssa.OpcodeFmax: - m.lowerFminFmax(instr) - case ssa.OpcodeFcopysign: - m.lowerFcopysign(instr) - case ssa.OpcodeBitcast: - m.lowerBitcast(instr) - case ssa.OpcodeSqrt: - m.lowerSqrt(instr) - case ssa.OpcodeFpromote: - v := instr.Arg() - rn := m.getOperand_Reg(m.c.ValueDefinition(v)) - rd := m.c.VRegOf(instr.Return()) - cnt := m.allocateInstr() - cnt.asXmmUnaryRmR(sseOpcodeCvtss2sd, rn, rd) - m.insert(cnt) - case ssa.OpcodeFdemote: - v := instr.Arg() - rn := m.getOperand_Reg(m.c.ValueDefinition(v)) - rd := m.c.VRegOf(instr.Return()) - cnt := m.allocateInstr() - cnt.asXmmUnaryRmR(sseOpcodeCvtsd2ss, rn, rd) - m.insert(cnt) - case ssa.OpcodeFcvtToSint, ssa.OpcodeFcvtToSintSat: - x, ctx := instr.Arg2() - rn := m.getOperand_Reg(m.c.ValueDefinition(x)) - rd := m.c.VRegOf(instr.Return()) - ctxVReg := m.c.VRegOf(ctx) - m.lowerFcvtToSint(ctxVReg, rn.reg(), rd, x.Type() == ssa.TypeF64, - instr.Return().Type().Bits() == 64, op == ssa.OpcodeFcvtToSintSat) - case ssa.OpcodeFcvtToUint, ssa.OpcodeFcvtToUintSat: - x, ctx := instr.Arg2() - rn := m.getOperand_Reg(m.c.ValueDefinition(x)) - rd := m.c.VRegOf(instr.Return()) - ctxVReg := m.c.VRegOf(ctx) - m.lowerFcvtToUint(ctxVReg, rn.reg(), rd, x.Type() == ssa.TypeF64, - instr.Return().Type().Bits() == 64, op == ssa.OpcodeFcvtToUintSat) - case ssa.OpcodeFcvtFromSint: - x := instr.Arg() - rn := m.getOperand_Reg(m.c.ValueDefinition(x)) - rd := newOperandReg(m.c.VRegOf(instr.Return())) - m.lowerFcvtFromSint(rn, rd, - x.Type() == ssa.TypeI64, instr.Return().Type().Bits() == 64) - case ssa.OpcodeFcvtFromUint: - x := instr.Arg() - rn := m.getOperand_Reg(m.c.ValueDefinition(x)) - rd := newOperandReg(m.c.VRegOf(instr.Return())) - m.lowerFcvtFromUint(rn, rd, x.Type() == ssa.TypeI64, - instr.Return().Type().Bits() == 64) - case ssa.OpcodeVanyTrue: - m.lowerVanyTrue(instr) - case ssa.OpcodeVallTrue: - m.lowerVallTrue(instr) - case ssa.OpcodeVhighBits: - m.lowerVhighBits(instr) - case ssa.OpcodeVbnot: - m.lowerVbnot(instr) - case ssa.OpcodeVband: - x, y := instr.Arg2() - m.lowerVbBinOp(sseOpcodePand, x, y, instr.Return()) - case ssa.OpcodeVbor: - x, y := instr.Arg2() - m.lowerVbBinOp(sseOpcodePor, x, y, instr.Return()) - case ssa.OpcodeVbxor: - x, y := instr.Arg2() - m.lowerVbBinOp(sseOpcodePxor, x, y, instr.Return()) - case ssa.OpcodeVbandnot: - m.lowerVbandnot(instr, sseOpcodePandn) - case ssa.OpcodeVbitselect: - m.lowerVbitselect(instr) - case ssa.OpcodeVIadd: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneI8x16: - vecOp = sseOpcodePaddb - case ssa.VecLaneI16x8: - vecOp = sseOpcodePaddw - case ssa.VecLaneI32x4: - vecOp = sseOpcodePaddd - case ssa.VecLaneI64x2: - vecOp = sseOpcodePaddq - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - - case ssa.OpcodeVSaddSat: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneI8x16: - vecOp = sseOpcodePaddsb - case ssa.VecLaneI16x8: - vecOp = sseOpcodePaddsw - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - - case ssa.OpcodeVUaddSat: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneI8x16: - vecOp = sseOpcodePaddusb - case ssa.VecLaneI16x8: - vecOp = sseOpcodePaddusw - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - - case ssa.OpcodeVIsub: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneI8x16: - vecOp = sseOpcodePsubb - case ssa.VecLaneI16x8: - vecOp = sseOpcodePsubw - case ssa.VecLaneI32x4: - vecOp = sseOpcodePsubd - case ssa.VecLaneI64x2: - vecOp = sseOpcodePsubq - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - - case ssa.OpcodeVSsubSat: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneI8x16: - vecOp = sseOpcodePsubsb - case ssa.VecLaneI16x8: - vecOp = sseOpcodePsubsw - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - - case ssa.OpcodeVUsubSat: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneI8x16: - vecOp = sseOpcodePsubusb - case ssa.VecLaneI16x8: - vecOp = sseOpcodePsubusw - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - - case ssa.OpcodeVImul: - m.lowerVImul(instr) - case ssa.OpcodeVIneg: - x, lane := instr.ArgWithLane() - rn := m.getOperand_Reg(m.c.ValueDefinition(x)) - rd := m.c.VRegOf(instr.Return()) - var vecOp sseOpcode - switch lane { - case ssa.VecLaneI8x16: - vecOp = sseOpcodePsubb - case ssa.VecLaneI16x8: - vecOp = sseOpcodePsubw - case ssa.VecLaneI32x4: - vecOp = sseOpcodePsubd - case ssa.VecLaneI64x2: - vecOp = sseOpcodePsubq - default: - panic("BUG") - } - - tmp := m.c.AllocateVReg(ssa.TypeV128) - m.insert(m.allocateInstr().asZeros(tmp)) - - i := m.allocateInstr() - i.asXmmRmR(vecOp, rn, tmp) - m.insert(i) - - m.copyTo(tmp, rd) - case ssa.OpcodeVFadd: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneF32x4: - vecOp = sseOpcodeAddps - case ssa.VecLaneF64x2: - vecOp = sseOpcodeAddpd - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - - case ssa.OpcodeVFsub: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneF32x4: - vecOp = sseOpcodeSubps - case ssa.VecLaneF64x2: - vecOp = sseOpcodeSubpd - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - - case ssa.OpcodeVFdiv: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneF32x4: - vecOp = sseOpcodeDivps - case ssa.VecLaneF64x2: - vecOp = sseOpcodeDivpd - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - - case ssa.OpcodeVFmul: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneF32x4: - vecOp = sseOpcodeMulps - case ssa.VecLaneF64x2: - vecOp = sseOpcodeMulpd - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - - case ssa.OpcodeVFneg: - x, lane := instr.ArgWithLane() - rn := m.getOperand_Reg(m.c.ValueDefinition(x)) - rd := m.c.VRegOf(instr.Return()) - - tmp := m.c.AllocateVReg(ssa.TypeV128) - - var shiftOp, xorOp sseOpcode - var shiftAmt uint32 - switch lane { - case ssa.VecLaneF32x4: - shiftOp, shiftAmt, xorOp = sseOpcodePslld, 31, sseOpcodeXorps - case ssa.VecLaneF64x2: - shiftOp, shiftAmt, xorOp = sseOpcodePsllq, 63, sseOpcodeXorpd - } - - zero := m.allocateInstr() - zero.asZeros(tmp) - m.insert(zero) - - // Set all bits on tmp by CMPPD with arg=0 (== pseudo CMPEQPD instruction). - // See https://www.felixcloutier.com/x86/cmpps - // - // Note: if we do not clear all the bits ^ with XORPS, this might end up not setting ones on some lane - // if the lane is NaN. - cmp := m.allocateInstr() - cmp.asXmmRmRImm(sseOpcodeCmppd, uint8(cmpPredEQ_UQ), newOperandReg(tmp), tmp) - m.insert(cmp) - - // Do the left shift on each lane to set only the most significant bit in each. - i := m.allocateInstr() - i.asXmmRmiReg(shiftOp, newOperandImm32(shiftAmt), tmp) - m.insert(i) - - // Get the negated result by XOR on each lane with tmp. - i = m.allocateInstr() - i.asXmmRmR(xorOp, rn, tmp) - m.insert(i) - - m.copyTo(tmp, rd) - - case ssa.OpcodeVSqrt: - x, lane := instr.ArgWithLane() - rn := m.getOperand_Reg(m.c.ValueDefinition(x)) - rd := m.c.VRegOf(instr.Return()) - - var vecOp sseOpcode - switch lane { - case ssa.VecLaneF32x4: - vecOp = sseOpcodeSqrtps - case ssa.VecLaneF64x2: - vecOp = sseOpcodeSqrtpd - } - i := m.allocateInstr() - i.asXmmUnaryRmR(vecOp, rn, rd) - m.insert(i) - - case ssa.OpcodeVImin: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneI8x16: - vecOp = sseOpcodePminsb - case ssa.VecLaneI16x8: - vecOp = sseOpcodePminsw - case ssa.VecLaneI32x4: - vecOp = sseOpcodePminsd - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - - case ssa.OpcodeVUmin: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneI8x16: - vecOp = sseOpcodePminub - case ssa.VecLaneI16x8: - vecOp = sseOpcodePminuw - case ssa.VecLaneI32x4: - vecOp = sseOpcodePminud - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - - case ssa.OpcodeVImax: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneI8x16: - vecOp = sseOpcodePmaxsb - case ssa.VecLaneI16x8: - vecOp = sseOpcodePmaxsw - case ssa.VecLaneI32x4: - vecOp = sseOpcodePmaxsd - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - - case ssa.OpcodeVUmax: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneI8x16: - vecOp = sseOpcodePmaxub - case ssa.VecLaneI16x8: - vecOp = sseOpcodePmaxuw - case ssa.VecLaneI32x4: - vecOp = sseOpcodePmaxud - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - - case ssa.OpcodeVAvgRound: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneI8x16: - vecOp = sseOpcodePavgb - case ssa.VecLaneI16x8: - vecOp = sseOpcodePavgw - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - - case ssa.OpcodeVIcmp: - x, y, c, lane := instr.VIcmpData() - m.lowerVIcmp(x, y, c, instr.Return(), lane) - - case ssa.OpcodeVFcmp: - x, y, c, lane := instr.VFcmpData() - m.lowerVFcmp(x, y, c, instr.Return(), lane) - - case ssa.OpcodeExtractlane: - x, index, signed, lane := instr.ExtractlaneData() - m.lowerExtractLane(x, index, signed, instr.Return(), lane) - - case ssa.OpcodeInsertlane: - x, y, index, lane := instr.InsertlaneData() - m.lowerInsertLane(x, y, index, instr.Return(), lane) - - case ssa.OpcodeSwizzle: - x, y, _ := instr.Arg2WithLane() - m.lowerSwizzle(x, y, instr.Return()) - - case ssa.OpcodeShuffle: - x, y, lo, hi := instr.ShuffleData() - m.lowerShuffle(x, y, lo, hi, instr.Return()) - - case ssa.OpcodeSplat: - x, lane := instr.ArgWithLane() - m.lowerSplat(x, instr.Return(), lane) - - case ssa.OpcodeSqmulRoundSat: - x, y := instr.Arg2() - m.lowerSqmulRoundSat(x, y, instr.Return()) - - case ssa.OpcodeVZeroExtLoad: - ptr, offset, typ := instr.VZeroExtLoadData() - var sseOp sseOpcode - // Both movss and movsd clears the higher bits of the destination register upt 128 bits. - // https://www.felixcloutier.com/x86/movss - // https://www.felixcloutier.com/x86/movsd - if typ == ssa.TypeF32 { - sseOp = sseOpcodeMovss - } else { - sseOp = sseOpcodeMovsd - } - mem := m.lowerToAddressMode(ptr, offset) - dst := m.c.VRegOf(instr.Return()) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, newOperandMem(mem), dst)) - - case ssa.OpcodeVMinPseudo: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneF32x4: - vecOp = sseOpcodeMinps - case ssa.VecLaneF64x2: - vecOp = sseOpcodeMinpd - default: - panic("BUG: unexpected lane type") - } - m.lowerVbBinOpUnaligned(vecOp, y, x, instr.Return()) - - case ssa.OpcodeVMaxPseudo: - x, y, lane := instr.Arg2WithLane() - var vecOp sseOpcode - switch lane { - case ssa.VecLaneF32x4: - vecOp = sseOpcodeMaxps - case ssa.VecLaneF64x2: - vecOp = sseOpcodeMaxpd - default: - panic("BUG: unexpected lane type") - } - m.lowerVbBinOpUnaligned(vecOp, y, x, instr.Return()) - - case ssa.OpcodeVIshl: - x, y, lane := instr.Arg2WithLane() - m.lowerVIshl(x, y, instr.Return(), lane) - - case ssa.OpcodeVSshr: - x, y, lane := instr.Arg2WithLane() - m.lowerVSshr(x, y, instr.Return(), lane) - - case ssa.OpcodeVUshr: - x, y, lane := instr.Arg2WithLane() - m.lowerVUshr(x, y, instr.Return(), lane) - - case ssa.OpcodeVCeil: - x, lane := instr.ArgWithLane() - m.lowerVRound(x, instr.Return(), 0x2, lane == ssa.VecLaneF64x2) - - case ssa.OpcodeVFloor: - x, lane := instr.ArgWithLane() - m.lowerVRound(x, instr.Return(), 0x1, lane == ssa.VecLaneF64x2) - - case ssa.OpcodeVTrunc: - x, lane := instr.ArgWithLane() - m.lowerVRound(x, instr.Return(), 0x3, lane == ssa.VecLaneF64x2) - - case ssa.OpcodeVNearest: - x, lane := instr.ArgWithLane() - m.lowerVRound(x, instr.Return(), 0x0, lane == ssa.VecLaneF64x2) - - case ssa.OpcodeExtIaddPairwise: - x, lane, signed := instr.ExtIaddPairwiseData() - m.lowerExtIaddPairwise(x, instr.Return(), lane, signed) - - case ssa.OpcodeUwidenLow, ssa.OpcodeSwidenLow: - x, lane := instr.ArgWithLane() - m.lowerWidenLow(x, instr.Return(), lane, op == ssa.OpcodeSwidenLow) - - case ssa.OpcodeUwidenHigh, ssa.OpcodeSwidenHigh: - x, lane := instr.ArgWithLane() - m.lowerWidenHigh(x, instr.Return(), lane, op == ssa.OpcodeSwidenHigh) - - case ssa.OpcodeLoadSplat: - ptr, offset, lane := instr.LoadSplatData() - m.lowerLoadSplat(ptr, offset, instr.Return(), lane) - - case ssa.OpcodeVFcvtFromUint, ssa.OpcodeVFcvtFromSint: - x, lane := instr.ArgWithLane() - m.lowerVFcvtFromInt(x, instr.Return(), lane, op == ssa.OpcodeVFcvtFromSint) - - case ssa.OpcodeVFcvtToSintSat, ssa.OpcodeVFcvtToUintSat: - x, lane := instr.ArgWithLane() - m.lowerVFcvtToIntSat(x, instr.Return(), lane, op == ssa.OpcodeVFcvtToSintSat) - - case ssa.OpcodeSnarrow, ssa.OpcodeUnarrow: - x, y, lane := instr.Arg2WithLane() - m.lowerNarrow(x, y, instr.Return(), lane, op == ssa.OpcodeSnarrow) - - case ssa.OpcodeFvpromoteLow: - x := instr.Arg() - src := m.getOperand_Reg(m.c.ValueDefinition(x)) - dst := m.c.VRegOf(instr.Return()) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtps2pd, src, dst)) - - case ssa.OpcodeFvdemote: - x := instr.Arg() - src := m.getOperand_Reg(m.c.ValueDefinition(x)) - dst := m.c.VRegOf(instr.Return()) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtpd2ps, src, dst)) - - case ssa.OpcodeWideningPairwiseDotProductS: - x, y := instr.Arg2() - m.lowerWideningPairwiseDotProductS(x, y, instr.Return()) - - case ssa.OpcodeVIabs: - m.lowerVIabs(instr) - case ssa.OpcodeVIpopcnt: - m.lowerVIpopcnt(instr) - case ssa.OpcodeVFmin: - m.lowerVFmin(instr) - case ssa.OpcodeVFmax: - m.lowerVFmax(instr) - case ssa.OpcodeVFabs: - m.lowerVFabs(instr) - case ssa.OpcodeUndefined: - m.insert(m.allocateInstr().asUD2()) - case ssa.OpcodeExitWithCode: - execCtx, code := instr.ExitWithCodeData() - m.lowerExitWithCode(m.c.VRegOf(execCtx), code) - case ssa.OpcodeExitIfTrueWithCode: - execCtx, c, code := instr.ExitIfTrueWithCodeData() - m.lowerExitIfTrueWithCode(m.c.VRegOf(execCtx), c, code) - case ssa.OpcodeLoad: - ptr, offset, typ := instr.LoadData() - dst := m.c.VRegOf(instr.Return()) - m.lowerLoad(ptr, offset, typ, dst) - case ssa.OpcodeUload8, ssa.OpcodeUload16, ssa.OpcodeUload32, ssa.OpcodeSload8, ssa.OpcodeSload16, ssa.OpcodeSload32: - ptr, offset, _ := instr.LoadData() - ret := m.c.VRegOf(instr.Return()) - m.lowerExtLoad(op, ptr, offset, ret) - case ssa.OpcodeVconst: - result := m.c.VRegOf(instr.Return()) - lo, hi := instr.VconstData() - m.lowerVconst(result, lo, hi) - case ssa.OpcodeSExtend, ssa.OpcodeUExtend: - from, to, signed := instr.ExtendData() - m.lowerExtend(instr.Arg(), instr.Return(), from, to, signed) - case ssa.OpcodeIcmp: - m.lowerIcmp(instr) - case ssa.OpcodeFcmp: - m.lowerFcmp(instr) - case ssa.OpcodeSelect: - cval, x, y := instr.SelectData() - m.lowerSelect(x, y, cval, instr.Return()) - case ssa.OpcodeIreduce: - rn := m.getOperand_Mem_Reg(m.c.ValueDefinition(instr.Arg())) - retVal := instr.Return() - rd := m.c.VRegOf(retVal) - - if retVal.Type() != ssa.TypeI32 { - panic("TODO?: Ireduce to non-i32") - } - m.insert(m.allocateInstr().asMovzxRmR(extModeLQ, rn, rd)) - - case ssa.OpcodeAtomicLoad: - ptr := instr.Arg() - size := instr.AtomicTargetSize() - dst := m.c.VRegOf(instr.Return()) - - // At this point, the ptr is ensured to be aligned, so using a normal load is atomic. - // https://github.com/golang/go/blob/adead1a93f472affa97c494ef19f2f492ee6f34a/src/runtime/internal/atomic/atomic_amd64.go#L30 - mem := newOperandMem(m.lowerToAddressMode(ptr, 0)) - load := m.allocateInstr() - switch size { - case 8: - load.asMov64MR(mem, dst) - case 4: - load.asMovzxRmR(extModeLQ, mem, dst) - case 2: - load.asMovzxRmR(extModeWQ, mem, dst) - case 1: - load.asMovzxRmR(extModeBQ, mem, dst) - default: - panic("BUG") - } - m.insert(load) - - case ssa.OpcodeFence: - m.insert(m.allocateInstr().asMFence()) - - case ssa.OpcodeAtomicStore: - ptr, _val := instr.Arg2() - size := instr.AtomicTargetSize() - - val := m.getOperand_Reg(m.c.ValueDefinition(_val)) - // The content on the val register will be overwritten by xchg, so we need to copy it to a temporary register. - copied := m.copyToTmp(val.reg()) - - mem := newOperandMem(m.lowerToAddressMode(ptr, 0)) - store := m.allocateInstr().asXCHG(copied, mem, byte(size)) - m.insert(store) - - case ssa.OpcodeAtomicCas: - addr, exp, repl := instr.Arg3() - size := instr.AtomicTargetSize() - m.lowerAtomicCas(addr, exp, repl, size, instr.Return()) - - case ssa.OpcodeAtomicRmw: - addr, val := instr.Arg2() - atomicOp, size := instr.AtomicRmwData() - m.lowerAtomicRmw(atomicOp, addr, val, size, instr.Return()) - - default: - panic("TODO: lowering " + op.String()) - } -} - -func (m *machine) lowerAtomicRmw(op ssa.AtomicRmwOp, addr, val ssa.Value, size uint64, ret ssa.Value) { - mem := m.lowerToAddressMode(addr, 0) - _val := m.getOperand_Reg(m.c.ValueDefinition(val)) - - switch op { - case ssa.AtomicRmwOpAdd, ssa.AtomicRmwOpSub: - valCopied := m.copyToTmp(_val.reg()) - if op == ssa.AtomicRmwOpSub { - // Negate the value. - m.insert(m.allocateInstr().asNeg(newOperandReg(valCopied), true)) - } - m.insert(m.allocateInstr().asLockXAdd(valCopied, mem, byte(size))) - m.clearHigherBitsForAtomic(valCopied, size, ret.Type()) - m.copyTo(valCopied, m.c.VRegOf(ret)) - - case ssa.AtomicRmwOpAnd, ssa.AtomicRmwOpOr, ssa.AtomicRmwOpXor: - accumulator := raxVReg - // Reserve rax for the accumulator to make regalloc happy. - // Note: do this initialization before defining valCopied, because it might be the same register and - // if that happens, the unnecessary load/store will be performed inside the loop. - // This can be mitigated in any way once the register allocator is clever enough. - m.insert(m.allocateInstr().asDefineUninitializedReg(accumulator)) - - // Copy the value to a temporary register. - valCopied := m.copyToTmp(_val.reg()) - m.clearHigherBitsForAtomic(valCopied, size, ret.Type()) - - memOp := newOperandMem(mem) - tmp := m.c.AllocateVReg(ssa.TypeI64) - beginLoop, beginLoopLabel := m.allocateBrTarget() - { - m.insert(beginLoop) - // Reset the value on tmp by the original value. - m.copyTo(valCopied, tmp) - // Load the current value at the memory location into accumulator. - switch size { - case 1: - m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, memOp, accumulator)) - case 2: - m.insert(m.allocateInstr().asMovzxRmR(extModeWQ, memOp, accumulator)) - case 4: - m.insert(m.allocateInstr().asMovzxRmR(extModeLQ, memOp, accumulator)) - case 8: - m.insert(m.allocateInstr().asMov64MR(memOp, accumulator)) - default: - panic("BUG") - } - // Then perform the logical operation on the accumulator and the value on tmp. - switch op { - case ssa.AtomicRmwOpAnd: - m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, newOperandReg(accumulator), tmp, true)) - case ssa.AtomicRmwOpOr: - m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeOr, newOperandReg(accumulator), tmp, true)) - case ssa.AtomicRmwOpXor: - m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeXor, newOperandReg(accumulator), tmp, true)) - default: - panic("BUG") - } - // Finally, try compare-exchange the value at the memory location with the tmp. - m.insert(m.allocateInstr().asLockCmpXCHG(tmp, memOp.addressMode(), byte(size))) - // If it succeeds, ZF will be set, and we can break the loop. - m.insert(m.allocateInstr().asJmpIf(condNZ, newOperandLabel(beginLoopLabel))) - } - - // valCopied must be alive at the end of the loop. - m.insert(m.allocateInstr().asNopUseReg(valCopied)) - - // At this point, accumulator contains the result. - m.clearHigherBitsForAtomic(accumulator, size, ret.Type()) - m.copyTo(accumulator, m.c.VRegOf(ret)) - - case ssa.AtomicRmwOpXchg: - valCopied := m.copyToTmp(_val.reg()) - - m.insert(m.allocateInstr().asXCHG(valCopied, newOperandMem(mem), byte(size))) - m.clearHigherBitsForAtomic(valCopied, size, ret.Type()) - m.copyTo(valCopied, m.c.VRegOf(ret)) - - default: - panic("BUG") - } -} - -func (m *machine) lowerAtomicCas(addr, exp, repl ssa.Value, size uint64, ret ssa.Value) { - mem := m.lowerToAddressMode(addr, 0) - expOp := m.getOperand_Reg(m.c.ValueDefinition(exp)) - replOp := m.getOperand_Reg(m.c.ValueDefinition(repl)) - - accumulator := raxVReg - m.copyTo(expOp.reg(), accumulator) - m.insert(m.allocateInstr().asLockCmpXCHG(replOp.reg(), mem, byte(size))) - m.clearHigherBitsForAtomic(accumulator, size, ret.Type()) - m.copyTo(accumulator, m.c.VRegOf(ret)) -} - -func (m *machine) clearHigherBitsForAtomic(r regalloc.VReg, valSize uint64, resultType ssa.Type) { - switch resultType { - case ssa.TypeI32: - switch valSize { - case 1: - m.insert(m.allocateInstr().asMovzxRmR(extModeBL, newOperandReg(r), r)) - case 2: - m.insert(m.allocateInstr().asMovzxRmR(extModeWL, newOperandReg(r), r)) - } - case ssa.TypeI64: - switch valSize { - case 1: - m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, newOperandReg(r), r)) - case 2: - m.insert(m.allocateInstr().asMovzxRmR(extModeWQ, newOperandReg(r), r)) - case 4: - m.insert(m.allocateInstr().asMovzxRmR(extModeLQ, newOperandReg(r), r)) - } - } -} - -func (m *machine) lowerFcmp(instr *ssa.Instruction) { - f1, f2, and := m.lowerFcmpToFlags(instr) - rd := m.c.VRegOf(instr.Return()) - if f2 == condInvalid { - tmp := m.c.AllocateVReg(ssa.TypeI32) - m.insert(m.allocateInstr().asSetcc(f1, tmp)) - // On amd64, setcc only sets the first byte of the register, so we need to zero extend it to match - // the semantics of Icmp that sets either 0 or 1. - m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, newOperandReg(tmp), rd)) - } else { - tmp1, tmp2 := m.c.AllocateVReg(ssa.TypeI32), m.c.AllocateVReg(ssa.TypeI32) - m.insert(m.allocateInstr().asSetcc(f1, tmp1)) - m.insert(m.allocateInstr().asSetcc(f2, tmp2)) - var op aluRmiROpcode - if and { - op = aluRmiROpcodeAnd - } else { - op = aluRmiROpcodeOr - } - m.insert(m.allocateInstr().asAluRmiR(op, newOperandReg(tmp1), tmp2, false)) - m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, newOperandReg(tmp2), rd)) - } -} - -func (m *machine) lowerIcmp(instr *ssa.Instruction) { - x, y, c := instr.IcmpData() - m.lowerIcmpToFlag(m.c.ValueDefinition(x), m.c.ValueDefinition(y), x.Type() == ssa.TypeI64) - rd := m.c.VRegOf(instr.Return()) - tmp := m.c.AllocateVReg(ssa.TypeI32) - m.insert(m.allocateInstr().asSetcc(condFromSSAIntCmpCond(c), tmp)) - // On amd64, setcc only sets the first byte of the register, so we need to zero extend it to match - // the semantics of Icmp that sets either 0 or 1. - m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, newOperandReg(tmp), rd)) -} - -func (m *machine) lowerSelect(x, y, cval, ret ssa.Value) { - xo, yo := m.getOperand_Mem_Reg(m.c.ValueDefinition(x)), m.getOperand_Reg(m.c.ValueDefinition(y)) - rd := m.c.VRegOf(ret) - - var cond cond - cvalDef := m.c.ValueDefinition(cval) - switch m.c.MatchInstrOneOf(cvalDef, condBranchMatches[:]) { - case ssa.OpcodeIcmp: - icmp := cvalDef.Instr - xc, yc, cc := icmp.IcmpData() - m.lowerIcmpToFlag(m.c.ValueDefinition(xc), m.c.ValueDefinition(yc), xc.Type() == ssa.TypeI64) - cond = condFromSSAIntCmpCond(cc) - icmp.Lowered() - default: // TODO: match ssa.OpcodeFcmp for optimization, but seems a bit complex. - cv := m.getOperand_Reg(cvalDef) - test := m.allocateInstr().asCmpRmiR(false, cv, cv.reg(), false) - m.insert(test) - cond = condNZ - } - - if typ := x.Type(); typ.IsInt() { - _64 := typ.Bits() == 64 - mov := m.allocateInstr() - tmp := m.c.AllocateVReg(typ) - switch yo.kind { - case operandKindReg: - mov.asMovRR(yo.reg(), tmp, _64) - case operandKindMem: - if _64 { - mov.asMov64MR(yo, tmp) - } else { - mov.asMovzxRmR(extModeLQ, yo, tmp) - } - default: - panic("BUG") - } - m.insert(mov) - cmov := m.allocateInstr().asCmove(cond, xo, tmp, _64) - m.insert(cmov) - m.insert(m.allocateInstr().asMovRR(tmp, rd, _64)) - } else { - mov := m.allocateInstr() - tmp := m.c.AllocateVReg(typ) - switch typ { - case ssa.TypeF32: - mov.asXmmUnaryRmR(sseOpcodeMovss, yo, tmp) - case ssa.TypeF64: - mov.asXmmUnaryRmR(sseOpcodeMovsd, yo, tmp) - case ssa.TypeV128: - mov.asXmmUnaryRmR(sseOpcodeMovdqu, yo, tmp) - default: - panic("BUG") - } - m.insert(mov) - - cmov := m.allocateInstr().asXmmCMov(cond, xo, tmp, typ.Size()) - m.insert(cmov) - - m.copyTo(tmp, rd) - } -} - -func (m *machine) lowerXmmCmovAfterRegAlloc(i *instruction) { - x := i.op1 - rd := i.op2.reg() - cond := cond(i.u1) - - jcc := m.allocateInstr() - m.insert(jcc) - - mov := m.allocateInstr() - switch i.u2 { - case 4: - mov.asXmmUnaryRmR(sseOpcodeMovss, x, rd) - case 8: - mov.asXmmUnaryRmR(sseOpcodeMovsd, x, rd) - case 16: - mov.asXmmUnaryRmR(sseOpcodeMovdqu, x, rd) - default: - panic("BUG") - } - m.insert(mov) - - nop, end := m.allocateBrTarget() - m.insert(nop) - jcc.asJmpIf(cond.invert(), newOperandLabel(end)) -} - -func (m *machine) lowerExtend(_arg, ret ssa.Value, from, to byte, signed bool) { - rd0 := m.c.VRegOf(ret) - arg := m.getOperand_Mem_Reg(m.c.ValueDefinition(_arg)) - - rd := m.c.AllocateVReg(ret.Type()) - - ext := m.allocateInstr() - switch { - case from == 8 && to == 16 && signed: - ext.asMovsxRmR(extModeBQ, arg, rd) - case from == 8 && to == 16 && !signed: - ext.asMovzxRmR(extModeBL, arg, rd) - case from == 8 && to == 32 && signed: - ext.asMovsxRmR(extModeBL, arg, rd) - case from == 8 && to == 32 && !signed: - ext.asMovzxRmR(extModeBQ, arg, rd) - case from == 8 && to == 64 && signed: - ext.asMovsxRmR(extModeBQ, arg, rd) - case from == 8 && to == 64 && !signed: - ext.asMovzxRmR(extModeBQ, arg, rd) - case from == 16 && to == 32 && signed: - ext.asMovsxRmR(extModeWL, arg, rd) - case from == 16 && to == 32 && !signed: - ext.asMovzxRmR(extModeWL, arg, rd) - case from == 16 && to == 64 && signed: - ext.asMovsxRmR(extModeWQ, arg, rd) - case from == 16 && to == 64 && !signed: - ext.asMovzxRmR(extModeWQ, arg, rd) - case from == 32 && to == 64 && signed: - ext.asMovsxRmR(extModeLQ, arg, rd) - case from == 32 && to == 64 && !signed: - ext.asMovzxRmR(extModeLQ, arg, rd) - default: - panic(fmt.Sprintf("BUG: unhandled extend: from=%d, to=%d, signed=%t", from, to, signed)) - } - m.insert(ext) - - m.copyTo(rd, rd0) -} - -func (m *machine) lowerVconst(dst regalloc.VReg, lo, hi uint64) { - if lo == 0 && hi == 0 { - m.insert(m.allocateInstr().asZeros(dst)) - return - } - - load := m.allocateInstr() - l, pos := m.allocateLabel() - m.consts = append(m.consts, _const{label: l, labelPos: pos, lo: lo, hi: hi}) - load.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(l)), dst) - m.insert(load) -} - -func (m *machine) lowerCtz(instr *ssa.Instruction) { - if m.cpuFeatures.HasExtra(platform.CpuExtraFeatureAmd64ABM) { - m.lowerUnaryRmR(instr, unaryRmROpcodeTzcnt) - } else { - // On processors that do not support TZCNT, the BSF instruction is - // executed instead. The key difference between TZCNT and BSF - // instruction is that if source operand is zero, the content of - // destination operand is undefined. - // https://www.felixcloutier.com/x86/tzcnt.html - - x := instr.Arg() - if !x.Type().IsInt() { - panic("BUG?") - } - _64 := x.Type().Bits() == 64 - - xDef := m.c.ValueDefinition(x) - tmp := m.c.AllocateVReg(x.Type()) - rm := m.getOperand_Reg(xDef) - - // First, we have to check if the target is non-zero. - test := m.allocateInstr() - test.asCmpRmiR(false, rm, rm.reg(), _64) - m.insert(test) - - jmpNz := m.allocateInstr() - m.insert(jmpNz) - - // If the value is zero, we just push the const value. - m.lowerIconst(tmp, uint64(x.Type().Bits()), _64) - - // Now jump right after the non-zero case. - jmpAtEnd := m.allocateInstr() - m.insert(jmpAtEnd) - - // jmpNz target label is set here. - nop, nz := m.allocateBrTarget() - jmpNz.asJmpIf(condNZ, newOperandLabel(nz)) - m.insert(nop) - - // Emit the non-zero case. - bsr := m.allocateInstr() - bsr.asUnaryRmR(unaryRmROpcodeBsf, rm, tmp, _64) - m.insert(bsr) - - // jmpAtEnd target label is set here. - nopEnd, end := m.allocateBrTarget() - jmpAtEnd.asJmp(newOperandLabel(end)) - m.insert(nopEnd) - - m.copyTo(tmp, m.c.VRegOf(instr.Return())) - } -} - -func (m *machine) lowerClz(instr *ssa.Instruction) { - if m.cpuFeatures.HasExtra(platform.CpuExtraFeatureAmd64ABM) { - m.lowerUnaryRmR(instr, unaryRmROpcodeLzcnt) - } else { - // On processors that do not support LZCNT, we combine BSR (calculating - // most significant set bit) with XOR. This logic is described in - // "Replace Raw Assembly Code with Builtin Intrinsics" section in: - // https://developer.apple.com/documentation/apple-silicon/addressing-architectural-differences-in-your-macos-code. - - x := instr.Arg() - if !x.Type().IsInt() { - panic("BUG?") - } - _64 := x.Type().Bits() == 64 - - xDef := m.c.ValueDefinition(x) - rm := m.getOperand_Reg(xDef) - tmp := m.c.AllocateVReg(x.Type()) - - // First, we have to check if the rm is non-zero as BSR is undefined - // on zero. See https://www.felixcloutier.com/x86/bsr. - test := m.allocateInstr() - test.asCmpRmiR(false, rm, rm.reg(), _64) - m.insert(test) - - jmpNz := m.allocateInstr() - m.insert(jmpNz) - - // If the value is zero, we just push the const value. - m.lowerIconst(tmp, uint64(x.Type().Bits()), _64) - - // Now jump right after the non-zero case. - jmpAtEnd := m.allocateInstr() - m.insert(jmpAtEnd) - - // jmpNz target label is set here. - nop, nz := m.allocateBrTarget() - jmpNz.asJmpIf(condNZ, newOperandLabel(nz)) - m.insert(nop) - - // Emit the non-zero case. - bsr := m.allocateInstr() - bsr.asUnaryRmR(unaryRmROpcodeBsr, rm, tmp, _64) - m.insert(bsr) - - // Now we XOR the value with the bit length minus one. - xor := m.allocateInstr() - xor.asAluRmiR(aluRmiROpcodeXor, newOperandImm32(uint32(x.Type().Bits()-1)), tmp, _64) - m.insert(xor) - - // jmpAtEnd target label is set here. - nopEnd, end := m.allocateBrTarget() - jmpAtEnd.asJmp(newOperandLabel(end)) - m.insert(nopEnd) - - m.copyTo(tmp, m.c.VRegOf(instr.Return())) - } -} - -func (m *machine) lowerUnaryRmR(si *ssa.Instruction, op unaryRmROpcode) { - x := si.Arg() - if !x.Type().IsInt() { - panic("BUG?") - } - _64 := x.Type().Bits() == 64 - - xDef := m.c.ValueDefinition(x) - rm := m.getOperand_Mem_Reg(xDef) - rd := m.c.VRegOf(si.Return()) - - instr := m.allocateInstr() - instr.asUnaryRmR(op, rm, rd, _64) - m.insert(instr) -} - -func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, dst regalloc.VReg) { - mem := newOperandMem(m.lowerToAddressMode(ptr, offset)) - load := m.allocateInstr() - switch typ { - case ssa.TypeI32: - load.asMovzxRmR(extModeLQ, mem, dst) - case ssa.TypeI64: - load.asMov64MR(mem, dst) - case ssa.TypeF32: - load.asXmmUnaryRmR(sseOpcodeMovss, mem, dst) - case ssa.TypeF64: - load.asXmmUnaryRmR(sseOpcodeMovsd, mem, dst) - case ssa.TypeV128: - load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, dst) - default: - panic("BUG") - } - m.insert(load) -} - -func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, dst regalloc.VReg) { - mem := newOperandMem(m.lowerToAddressMode(ptr, offset)) - load := m.allocateInstr() - switch op { - case ssa.OpcodeUload8: - load.asMovzxRmR(extModeBQ, mem, dst) - case ssa.OpcodeUload16: - load.asMovzxRmR(extModeWQ, mem, dst) - case ssa.OpcodeUload32: - load.asMovzxRmR(extModeLQ, mem, dst) - case ssa.OpcodeSload8: - load.asMovsxRmR(extModeBQ, mem, dst) - case ssa.OpcodeSload16: - load.asMovsxRmR(extModeWQ, mem, dst) - case ssa.OpcodeSload32: - load.asMovsxRmR(extModeLQ, mem, dst) - default: - panic("BUG") - } - m.insert(load) -} - -func (m *machine) lowerExitIfTrueWithCode(execCtx regalloc.VReg, cond ssa.Value, code wazevoapi.ExitCode) { - condDef := m.c.ValueDefinition(cond) - if !m.c.MatchInstr(condDef, ssa.OpcodeIcmp) { - panic("TODO: ExitIfTrue must come after Icmp at the moment: " + condDef.Instr.Opcode().String()) - } - cvalInstr := condDef.Instr - cvalInstr.MarkLowered() - - // We need to copy the execution context to a temp register, because if it's spilled, - // it might end up being reloaded inside the exiting branch. - execCtxTmp := m.copyToTmp(execCtx) - - x, y, c := cvalInstr.IcmpData() - xx, yy := m.c.ValueDefinition(x), m.c.ValueDefinition(y) - if !m.tryLowerBandToFlag(xx, yy) { - m.lowerIcmpToFlag(xx, yy, x.Type() == ssa.TypeI64) - } - - jmpIf := m.allocateInstr() - m.insert(jmpIf) - l := m.lowerExitWithCode(execCtxTmp, code) - jmpIf.asJmpIf(condFromSSAIntCmpCond(c).invert(), newOperandLabel(l)) -} - -func (m *machine) tryLowerBandToFlag(x, y backend.SSAValueDefinition) (ok bool) { - var target backend.SSAValueDefinition - var got bool - if x.IsFromInstr() && x.Instr.Constant() && x.Instr.ConstantVal() == 0 { - if m.c.MatchInstr(y, ssa.OpcodeBand) { - target = y - got = true - } - } - - if y.IsFromInstr() && y.Instr.Constant() && y.Instr.ConstantVal() == 0 { - if m.c.MatchInstr(x, ssa.OpcodeBand) { - target = x - got = true - } - } - - if !got { - return false - } - - bandInstr := target.Instr - bandX, bandY := bandInstr.Arg2() - - xx := m.getOperand_Reg(m.c.ValueDefinition(bandX)) - yy := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(bandY)) - test := m.allocateInstr().asCmpRmiR(false, yy, xx.reg(), bandX.Type() == ssa.TypeI64) - m.insert(test) - bandInstr.MarkLowered() - return true -} - -func (m *machine) allocateExitInstructions(execCtx, exitCodeReg regalloc.VReg) (saveRsp, saveRbp, setExitCode *instruction) { - saveRsp = m.allocateInstr().asMovRM( - rspVReg, - newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.U32(), execCtx)), - 8, - ) - - saveRbp = m.allocateInstr().asMovRM( - rbpVReg, - newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetFramePointerBeforeGoCall.U32(), execCtx)), - 8, - ) - setExitCode = m.allocateInstr().asMovRM( - exitCodeReg, - newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetExitCodeOffset.U32(), execCtx)), - 4, - ) - return -} - -func (m *machine) lowerExitWithCode(execCtx regalloc.VReg, code wazevoapi.ExitCode) (afterLabel label) { - exitCodeReg := rbpVReg - saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtx, exitCodeReg) - - // Set save RSP, RBP, and write exit code. - m.insert(saveRsp) - m.insert(saveRbp) - m.lowerIconst(exitCodeReg, uint64(code), false) - m.insert(setExitCode) - - ripReg := rbpVReg - - // Next is to save the current address for stack unwinding. - nop, currentAddrLabel := m.allocateBrTarget() - m.insert(nop) - readRip := m.allocateInstr().asLEA(newOperandLabel(currentAddrLabel), ripReg) - m.insert(readRip) - saveRip := m.allocateInstr().asMovRM( - ripReg, - newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetGoCallReturnAddress.U32(), execCtx)), - 8, - ) - m.insert(saveRip) - - // Finally exit. - exitSq := m.allocateExitSeq(execCtx) - m.insert(exitSq) - - // Return the label for continuation. - continuation, afterLabel := m.allocateBrTarget() - m.insert(continuation) - return afterLabel -} - -func (m *machine) lowerAluRmiROp(si *ssa.Instruction, op aluRmiROpcode) { - x, y := si.Arg2() - if !x.Type().IsInt() { - panic("BUG?") - } - - _64 := x.Type().Bits() == 64 - - xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y) - - // TODO: commutative args can be swapped if one of them is an immediate. - rn := m.getOperand_Reg(xDef) - rm := m.getOperand_Mem_Imm32_Reg(yDef) - rd := m.c.VRegOf(si.Return()) - - // rn is being overwritten, so we first copy its value to a temp register, - // in case it is referenced again later. - tmp := m.copyToTmp(rn.reg()) - - alu := m.allocateInstr() - alu.asAluRmiR(op, rm, tmp, _64) - m.insert(alu) - - // tmp now contains the result, we copy it to the dest register. - m.copyTo(tmp, rd) -} - -func (m *machine) lowerShiftR(si *ssa.Instruction, op shiftROp) { - x, amt := si.Arg2() - if !x.Type().IsInt() { - panic("BUG?") - } - _64 := x.Type().Bits() == 64 - - xDef, amtDef := m.c.ValueDefinition(x), m.c.ValueDefinition(amt) - - opAmt := m.getOperand_Imm32_Reg(amtDef) - rx := m.getOperand_Reg(xDef) - rd := m.c.VRegOf(si.Return()) - - // rx is being overwritten, so we first copy its value to a temp register, - // in case it is referenced again later. - tmpDst := m.copyToTmp(rx.reg()) - - if opAmt.kind == operandKindReg { - // If opAmt is a register we must copy its value to rcx, - // because shiftR encoding mandates that the shift amount is in rcx. - m.copyTo(opAmt.reg(), rcxVReg) - - alu := m.allocateInstr() - alu.asShiftR(op, newOperandReg(rcxVReg), tmpDst, _64) - m.insert(alu) - - } else { - alu := m.allocateInstr() - alu.asShiftR(op, opAmt, tmpDst, _64) - m.insert(alu) - } - - // tmp now contains the result, we copy it to the dest register. - m.copyTo(tmpDst, rd) -} - -func (m *machine) lowerXmmRmR(instr *ssa.Instruction) { - x, y := instr.Arg2() - if !x.Type().IsFloat() { - panic("BUG?") - } - _64 := x.Type().Bits() == 64 - - var op sseOpcode - if _64 { - switch instr.Opcode() { - case ssa.OpcodeFadd: - op = sseOpcodeAddsd - case ssa.OpcodeFsub: - op = sseOpcodeSubsd - case ssa.OpcodeFmul: - op = sseOpcodeMulsd - case ssa.OpcodeFdiv: - op = sseOpcodeDivsd - default: - panic("BUG") - } - } else { - switch instr.Opcode() { - case ssa.OpcodeFadd: - op = sseOpcodeAddss - case ssa.OpcodeFsub: - op = sseOpcodeSubss - case ssa.OpcodeFmul: - op = sseOpcodeMulss - case ssa.OpcodeFdiv: - op = sseOpcodeDivss - default: - panic("BUG") - } - } - - xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y) - rn := m.getOperand_Reg(yDef) - rm := m.getOperand_Reg(xDef) - rd := m.c.VRegOf(instr.Return()) - - // rm is being overwritten, so we first copy its value to a temp register, - // in case it is referenced again later. - tmp := m.copyToTmp(rm.reg()) - - xmm := m.allocateInstr().asXmmRmR(op, rn, tmp) - m.insert(xmm) - - m.copyTo(tmp, rd) -} - -func (m *machine) lowerSqrt(instr *ssa.Instruction) { - x := instr.Arg() - if !x.Type().IsFloat() { - panic("BUG") - } - _64 := x.Type().Bits() == 64 - var op sseOpcode - if _64 { - op = sseOpcodeSqrtsd - } else { - op = sseOpcodeSqrtss - } - - xDef := m.c.ValueDefinition(x) - rm := m.getOperand_Mem_Reg(xDef) - rd := m.c.VRegOf(instr.Return()) - - xmm := m.allocateInstr().asXmmUnaryRmR(op, rm, rd) - m.insert(xmm) -} - -func (m *machine) lowerFabsFneg(instr *ssa.Instruction) { - x := instr.Arg() - if !x.Type().IsFloat() { - panic("BUG") - } - _64 := x.Type().Bits() == 64 - var op sseOpcode - var mask uint64 - if _64 { - switch instr.Opcode() { - case ssa.OpcodeFabs: - mask, op = 0x7fffffffffffffff, sseOpcodeAndpd - case ssa.OpcodeFneg: - mask, op = 0x8000000000000000, sseOpcodeXorpd - } - } else { - switch instr.Opcode() { - case ssa.OpcodeFabs: - mask, op = 0x7fffffff, sseOpcodeAndps - case ssa.OpcodeFneg: - mask, op = 0x80000000, sseOpcodeXorps - } - } - - tmp := m.c.AllocateVReg(x.Type()) - - xDef := m.c.ValueDefinition(x) - rm := m.getOperand_Reg(xDef) - rd := m.c.VRegOf(instr.Return()) - - m.lowerFconst(tmp, mask, _64) - - xmm := m.allocateInstr().asXmmRmR(op, rm, tmp) - m.insert(xmm) - - m.copyTo(tmp, rd) -} - -func (m *machine) lowerStore(si *ssa.Instruction) { - value, ptr, offset, storeSizeInBits := si.StoreData() - rm := m.getOperand_Reg(m.c.ValueDefinition(value)) - mem := newOperandMem(m.lowerToAddressMode(ptr, offset)) - - store := m.allocateInstr() - switch value.Type() { - case ssa.TypeI32: - store.asMovRM(rm.reg(), mem, storeSizeInBits/8) - case ssa.TypeI64: - store.asMovRM(rm.reg(), mem, storeSizeInBits/8) - case ssa.TypeF32: - store.asXmmMovRM(sseOpcodeMovss, rm.reg(), mem) - case ssa.TypeF64: - store.asXmmMovRM(sseOpcodeMovsd, rm.reg(), mem) - case ssa.TypeV128: - store.asXmmMovRM(sseOpcodeMovdqu, rm.reg(), mem) - default: - panic("BUG") - } - m.insert(store) -} - -func (m *machine) lowerCall(si *ssa.Instruction) { - isDirectCall := si.Opcode() == ssa.OpcodeCall - var indirectCalleePtr ssa.Value - var directCallee ssa.FuncRef - var sigID ssa.SignatureID - var args []ssa.Value - var isMemmove bool - if isDirectCall { - directCallee, sigID, args = si.CallData() - } else { - indirectCalleePtr, sigID, args, isMemmove = si.CallIndirectData() - } - calleeABI := m.c.GetFunctionABI(m.c.SSABuilder().ResolveSignature(sigID)) - - stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize()) - if m.maxRequiredStackSizeForCalls < stackSlotSize+16 { - m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // 16 == return address + RBP. - } - - // Note: See machine.SetupPrologue for the stack layout. - // The stack pointer decrease/increase will be inserted later in the compilation. - - for i, arg := range args { - reg := m.c.VRegOf(arg) - def := m.c.ValueDefinition(arg) - m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize) - } - - if isMemmove { - // Go's memmove *might* use all xmm0-xmm15, so we need to release them. - // https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#architecture-specifics - // https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/runtime/memmove_amd64.s#L271-L286 - for i := regalloc.RealReg(0); i < 16; i++ { - m.insert(m.allocateInstr().asDefineUninitializedReg(regInfo.RealRegToVReg[xmm0+i])) - } - } - - if isDirectCall { - call := m.allocateInstr().asCall(directCallee, calleeABI) - m.insert(call) - } else { - ptrOp := m.getOperand_Mem_Reg(m.c.ValueDefinition(indirectCalleePtr)) - callInd := m.allocateInstr().asCallIndirect(ptrOp, calleeABI) - m.insert(callInd) - } - - if isMemmove { - for i := regalloc.RealReg(0); i < 16; i++ { - m.insert(m.allocateInstr().asNopUseReg(regInfo.RealRegToVReg[xmm0+i])) - } - } - - var index int - r1, rs := si.Returns() - if r1.Valid() { - m.callerGenFunctionReturnVReg(calleeABI, 0, m.c.VRegOf(r1), stackSlotSize) - index++ - } - - for _, r := range rs { - m.callerGenFunctionReturnVReg(calleeABI, index, m.c.VRegOf(r), stackSlotSize) - index++ - } -} - -// callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the -// caller side of the function call. -func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def backend.SSAValueDefinition, stackSlotSize int64) { - arg := &a.Args[argIndex] - if def.IsFromInstr() { - // Constant instructions are inlined. - if inst := def.Instr; inst.Constant() { - m.insertLoadConstant(inst, reg) - } - } - if arg.Kind == backend.ABIArgKindReg { - m.InsertMove(arg.Reg, reg, arg.Type) - } else { - store := m.allocateInstr() - mem := newOperandMem(m.newAmodeImmReg( - // -stackSlotSize because the stack pointer is not yet decreased. - uint32(arg.Offset-stackSlotSize), rspVReg)) - switch arg.Type { - case ssa.TypeI32: - store.asMovRM(reg, mem, 4) - case ssa.TypeI64: - store.asMovRM(reg, mem, 8) - case ssa.TypeF32: - store.asXmmMovRM(sseOpcodeMovss, reg, mem) - case ssa.TypeF64: - store.asXmmMovRM(sseOpcodeMovsd, reg, mem) - case ssa.TypeV128: - store.asXmmMovRM(sseOpcodeMovdqu, reg, mem) - default: - panic("BUG") - } - m.insert(store) - } -} - -func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex int, reg regalloc.VReg, stackSlotSize int64) { - r := &a.Rets[retIndex] - if r.Kind == backend.ABIArgKindReg { - m.InsertMove(reg, r.Reg, r.Type) - } else { - load := m.allocateInstr() - mem := newOperandMem(m.newAmodeImmReg( - // -stackSlotSize because the stack pointer is not yet decreased. - uint32(a.ArgStackSize+r.Offset-stackSlotSize), rspVReg)) - switch r.Type { - case ssa.TypeI32: - load.asMovzxRmR(extModeLQ, mem, reg) - case ssa.TypeI64: - load.asMov64MR(mem, reg) - case ssa.TypeF32: - load.asXmmUnaryRmR(sseOpcodeMovss, mem, reg) - case ssa.TypeF64: - load.asXmmUnaryRmR(sseOpcodeMovsd, mem, reg) - case ssa.TypeV128: - load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, reg) - default: - panic("BUG") - } - m.insert(load) - } -} - -// InsertMove implements backend.Machine. -func (m *machine) InsertMove(dst, src regalloc.VReg, typ ssa.Type) { - switch typ { - case ssa.TypeI32, ssa.TypeI64: - i := m.allocateInstr().asMovRR(src, dst, typ.Bits() == 64) - m.insert(i) - case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - var op sseOpcode - switch typ { - case ssa.TypeF32: - op = sseOpcodeMovss - case ssa.TypeF64: - op = sseOpcodeMovsd - case ssa.TypeV128: - op = sseOpcodeMovdqa - } - i := m.allocateInstr().asXmmUnaryRmR(op, newOperandReg(src), dst) - m.insert(i) - default: - panic("BUG") - } -} - -// Format implements backend.Machine. -func (m *machine) Format() string { - begins := map[*instruction]label{} - for l := label(0); l < m.nextLabel; l++ { - pos := m.labelPositionPool.Get(int(l)) - if pos != nil { - begins[pos.begin] = l - } - } - - var lines []string - for cur := m.rootInstr; cur != nil; cur = cur.next { - if l, ok := begins[cur]; ok { - var labelStr string - if l <= m.maxSSABlockID { - labelStr = fmt.Sprintf("%s (SSA Block: blk%d):", l, l) - } else { - labelStr = fmt.Sprintf("%s:", l) - } - lines = append(lines, labelStr) - } - if cur.kind == nop0 { - continue - } - lines = append(lines, "\t"+cur.String()) - } - for _, vc := range m.consts { - if vc._var == nil { - lines = append(lines, fmt.Sprintf("%s: const [%d %d]", vc.label, vc.lo, vc.hi)) - } else { - lines = append(lines, fmt.Sprintf("%s: const %#x", vc.label, vc._var)) - } - } - return "\n" + strings.Join(lines, "\n") + "\n" -} - -func (m *machine) encodeWithoutSSA(root *instruction) { - m.labelResolutionPends = m.labelResolutionPends[:0] - bufPtr := m.c.BufPtr() - for cur := root; cur != nil; cur = cur.next { - offset := int64(len(*bufPtr)) - if cur.kind == nop0 { - l := cur.nop0Label() - pos := m.labelPositionPool.Get(int(l)) - if pos != nil { - pos.binaryOffset = offset - } - } - - needLabelResolution := cur.encode(m.c) - if needLabelResolution { - m.labelResolutionPends = append(m.labelResolutionPends, - labelResolutionPend{instr: cur, imm32Offset: int64(len(*bufPtr)) - 4}, - ) - } - } - - for i := range m.labelResolutionPends { - p := &m.labelResolutionPends[i] - switch p.instr.kind { - case jmp, jmpIf, lea: - target := p.instr.jmpLabel() - targetOffset := m.labelPositionPool.Get(int(target)).binaryOffset - imm32Offset := p.imm32Offset - jmpOffset := int32(targetOffset - (p.imm32Offset + 4)) // +4 because RIP points to the next instruction. - binary.LittleEndian.PutUint32((*bufPtr)[imm32Offset:], uint32(jmpOffset)) - default: - panic("BUG") - } - } -} - -// Encode implements backend.Machine Encode. -func (m *machine) Encode(ctx context.Context) (err error) { - bufPtr := m.c.BufPtr() - - var fn string - var fnIndex int - var labelPosToLabel map[*labelPosition]label - if wazevoapi.PerfMapEnabled { - fn = wazevoapi.GetCurrentFunctionName(ctx) - labelPosToLabel = make(map[*labelPosition]label) - for i := 0; i <= m.labelPositionPool.MaxIDEncountered(); i++ { - pos := m.labelPositionPool.Get(i) - labelPosToLabel[pos] = label(i) - } - fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx) - } - - m.labelResolutionPends = m.labelResolutionPends[:0] - for _, pos := range m.orderedSSABlockLabelPos { - offset := int64(len(*bufPtr)) - pos.binaryOffset = offset - for cur := pos.begin; cur != pos.end.next; cur = cur.next { - offset := int64(len(*bufPtr)) - - switch cur.kind { - case nop0: - l := cur.nop0Label() - if pos := m.labelPositionPool.Get(int(l)); pos != nil { - pos.binaryOffset = offset - } - case sourceOffsetInfo: - m.c.AddSourceOffsetInfo(offset, cur.sourceOffsetInfo()) - } - - needLabelResolution := cur.encode(m.c) - if needLabelResolution { - m.labelResolutionPends = append(m.labelResolutionPends, - labelResolutionPend{instr: cur, instrOffset: offset, imm32Offset: int64(len(*bufPtr)) - 4}, - ) - } - } - - if wazevoapi.PerfMapEnabled { - l := labelPosToLabel[pos] - size := int64(len(*bufPtr)) - offset - wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, l)) - } - } - - for i := range m.consts { - offset := int64(len(*bufPtr)) - vc := &m.consts[i] - vc.labelPos.binaryOffset = offset - if vc._var == nil { - lo, hi := vc.lo, vc.hi - m.c.Emit8Bytes(lo) - m.c.Emit8Bytes(hi) - } else { - for _, b := range vc._var { - m.c.EmitByte(b) - } - } - } - - buf := *bufPtr - for i := range m.labelResolutionPends { - p := &m.labelResolutionPends[i] - switch p.instr.kind { - case jmp, jmpIf, lea, xmmUnaryRmR: - target := p.instr.jmpLabel() - targetOffset := m.labelPositionPool.Get(int(target)).binaryOffset - imm32Offset := p.imm32Offset - jmpOffset := int32(targetOffset - (p.imm32Offset + 4)) // +4 because RIP points to the next instruction. - binary.LittleEndian.PutUint32(buf[imm32Offset:], uint32(jmpOffset)) - case jmpTableIsland: - tableBegin := p.instrOffset - // Each entry is the offset from the beginning of the jmpTableIsland instruction in 8 bytes. - targets := m.jmpTableTargets[p.instr.u1] - for i, l := range targets { - targetOffset := m.labelPositionPool.Get(int(l)).binaryOffset - jmpOffset := targetOffset - tableBegin - binary.LittleEndian.PutUint64(buf[tableBegin+int64(i)*8:], uint64(jmpOffset)) - } - default: - panic("BUG") - } - } - return -} - -// ResolveRelocations implements backend.Machine. -func (m *machine) ResolveRelocations(refToBinaryOffset []int, _ int, binary []byte, relocations []backend.RelocationInfo, _ []int) { - for _, r := range relocations { - offset := r.Offset - calleeFnOffset := refToBinaryOffset[r.FuncRef] - // offset is the offset of the last 4 bytes of the call instruction. - callInstrOffsetBytes := binary[offset : offset+4] - diff := int64(calleeFnOffset) - (offset + 4) // +4 because we want the offset of the next instruction (In x64, RIP always points to the next instruction). - callInstrOffsetBytes[0] = byte(diff) - callInstrOffsetBytes[1] = byte(diff >> 8) - callInstrOffsetBytes[2] = byte(diff >> 16) - callInstrOffsetBytes[3] = byte(diff >> 24) - } -} - -// CallTrampolineIslandInfo implements backend.Machine CallTrampolineIslandInfo. -func (m *machine) CallTrampolineIslandInfo(_ int) (_, _ int, _ error) { return } - -func (m *machine) lowerIcmpToFlag(xd, yd backend.SSAValueDefinition, _64 bool) { - x := m.getOperand_Reg(xd) - y := m.getOperand_Mem_Imm32_Reg(yd) - cmp := m.allocateInstr().asCmpRmiR(true, y, x.reg(), _64) - m.insert(cmp) -} - -func (m *machine) lowerFcmpToFlags(instr *ssa.Instruction) (f1, f2 cond, and bool) { - x, y, c := instr.FcmpData() - switch c { - case ssa.FloatCmpCondEqual: - f1, f2 = condNP, condZ - and = true - case ssa.FloatCmpCondNotEqual: - f1, f2 = condP, condNZ - case ssa.FloatCmpCondLessThan: - f1 = condFromSSAFloatCmpCond(ssa.FloatCmpCondGreaterThan) - f2 = condInvalid - x, y = y, x - case ssa.FloatCmpCondLessThanOrEqual: - f1 = condFromSSAFloatCmpCond(ssa.FloatCmpCondGreaterThanOrEqual) - f2 = condInvalid - x, y = y, x - default: - f1 = condFromSSAFloatCmpCond(c) - f2 = condInvalid - } - - var opc sseOpcode - if x.Type() == ssa.TypeF32 { - opc = sseOpcodeUcomiss - } else { - opc = sseOpcodeUcomisd - } - - xr := m.getOperand_Reg(m.c.ValueDefinition(x)) - yr := m.getOperand_Mem_Reg(m.c.ValueDefinition(y)) - m.insert(m.allocateInstr().asXmmCmpRmR(opc, yr, xr.reg())) - return -} - -// allocateInstr allocates an instruction. -func (m *machine) allocateInstr() *instruction { - instr := m.instrPool.Allocate() - if !m.regAllocStarted { - instr.addedBeforeRegAlloc = true - } - return instr -} - -func (m *machine) allocateNop() *instruction { - instr := m.allocateInstr() - instr.kind = nop0 - return instr -} - -func (m *machine) insert(i *instruction) { - m.pendingInstructions = append(m.pendingInstructions, i) -} - -func (m *machine) allocateBrTarget() (nop *instruction, l label) { //nolint - l, pos := m.allocateLabel() - nop = m.allocateInstr() - nop.asNop0WithLabel(l) - pos.begin, pos.end = nop, nop - return -} - -func (m *machine) allocateLabel() (label, *labelPosition) { - l := m.nextLabel - pos := m.labelPositionPool.GetOrAllocate(int(l)) - m.nextLabel++ - return l, pos -} - -func (m *machine) getVRegSpillSlotOffsetFromSP(id regalloc.VRegID, size byte) int64 { - offset, ok := m.spillSlots[id] - if !ok { - offset = m.spillSlotSize - m.spillSlots[id] = offset - m.spillSlotSize += int64(size) - } - return offset -} - -func (m *machine) copyTo(src regalloc.VReg, dst regalloc.VReg) { - mov := m.allocateInstr() - if src.RegType() == regalloc.RegTypeInt { - mov.asMovRR(src, dst, true) - } else { - mov.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandReg(src), dst) - } - m.insert(mov) -} - -func (m *machine) copyToTmp(v regalloc.VReg) regalloc.VReg { - typ := m.c.TypeOf(v) - tmp := m.c.AllocateVReg(typ) - m.copyTo(v, tmp) - return tmp -} - -func (m *machine) requiredStackSize() int64 { - return m.maxRequiredStackSizeForCalls + - m.frameSize() + - 16 + // Need for stack checking. - 16 // return address and the caller RBP. -} - -func (m *machine) frameSize() int64 { - s := m.clobberedRegSlotSize() + m.spillSlotSize - if s&0xf != 0 { - panic(fmt.Errorf("BUG: frame size %d is not 16-byte aligned", s)) - } - return s -} - -func (m *machine) clobberedRegSlotSize() int64 { - return int64(len(m.clobberedRegs) * 16) -} - -func (m *machine) lowerIDivRem(si *ssa.Instruction, isDiv bool, signed bool) { - x, y, execCtx := si.Arg3() - - dividend := m.getOperand_Reg(m.c.ValueDefinition(x)) - divisor := m.getOperand_Reg(m.c.ValueDefinition(y)) - ctxVReg := m.c.VRegOf(execCtx) - tmpGp := m.c.AllocateVReg(si.Return().Type()) - - m.copyTo(dividend.reg(), raxVReg) - m.insert(m.allocateInstr().asDefineUninitializedReg(rdxVReg)) - m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp)) - seq := m.allocateInstr().asIdivRemSequence(ctxVReg, divisor.reg(), tmpGp, isDiv, signed, x.Type().Bits() == 64) - m.insert(seq) - rd := m.c.VRegOf(si.Return()) - if isDiv { - m.copyTo(raxVReg, rd) - } else { - m.copyTo(rdxVReg, rd) - } -} - -func (m *machine) lowerIDivRemSequenceAfterRegAlloc(i *instruction) { - execCtx, divisor, tmpGp, isDiv, signed, _64 := i.idivRemSequenceData() - - dividend := raxVReg - - // Ensure yr is not zero. - test := m.allocateInstr() - test.asCmpRmiR(false, newOperandReg(divisor), divisor, _64) - m.insert(test) - - jnz := m.allocateInstr() - m.insert(jnz) - - nz := m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerDivisionByZero) - - // If not zero, we can proceed with the division. - jnz.asJmpIf(condNZ, newOperandLabel(nz)) - - var ifRemNeg1 *instruction - if signed { - var neg1 uint64 - if _64 { - neg1 = 0xffffffffffffffff - } else { - neg1 = 0xffffffff - } - m.lowerIconst(tmpGp, neg1, _64) - - if isDiv { - // For signed division, we have to have branches for "math.MinInt{32,64} / -1" - // case which results in the floating point exception via division error as - // the resulting value exceeds the maximum of signed int. - - // First, we check if the divisor is -1. - cmp := m.allocateInstr() - cmp.asCmpRmiR(true, newOperandReg(tmpGp), divisor, _64) - m.insert(cmp) - - ifNotNeg1 := m.allocateInstr() - m.insert(ifNotNeg1) - - var minInt uint64 - if _64 { - minInt = 0x8000000000000000 - } else { - minInt = 0x80000000 - } - m.lowerIconst(tmpGp, minInt, _64) - - // Next we check if the quotient is the most negative value for the signed integer, i.e. - // if we are trying to do (math.MinInt32 / -1) or (math.MinInt64 / -1) respectively. - cmp2 := m.allocateInstr() - cmp2.asCmpRmiR(true, newOperandReg(tmpGp), dividend, _64) - m.insert(cmp2) - - ifNotMinInt := m.allocateInstr() - m.insert(ifNotMinInt) - - // Trap if we are trying to do (math.MinInt32 / -1) or (math.MinInt64 / -1), - // as that is the overflow in division as the result becomes 2^31 which is larger than - // the maximum of signed 32-bit int (2^31-1). - end := m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow) - ifNotNeg1.asJmpIf(condNZ, newOperandLabel(end)) - ifNotMinInt.asJmpIf(condNZ, newOperandLabel(end)) - } else { - // If it is remainder, zeros DX register and compare the divisor to -1. - xor := m.allocateInstr().asZeros(rdxVReg) - m.insert(xor) - - // We check if the divisor is -1. - cmp := m.allocateInstr() - cmp.asCmpRmiR(true, newOperandReg(tmpGp), divisor, _64) - m.insert(cmp) - - ifRemNeg1 = m.allocateInstr() - m.insert(ifRemNeg1) - } - - // Sign-extend DX register to have 2*x.Type().Bits() dividend over DX and AX registers. - sed := m.allocateInstr() - sed.asSignExtendData(_64) - m.insert(sed) - } else { - // Zeros DX register to have 2*x.Type().Bits() dividend over DX and AX registers. - zeros := m.allocateInstr().asZeros(rdxVReg) - m.insert(zeros) - } - - div := m.allocateInstr() - div.asDiv(newOperandReg(divisor), signed, _64) - m.insert(div) - - nop, end := m.allocateBrTarget() - m.insert(nop) - // If we are compiling a Rem instruction, when the divisor is -1 we land at the end of the function. - if ifRemNeg1 != nil { - ifRemNeg1.asJmpIf(condZ, newOperandLabel(end)) - } -} - -func (m *machine) lowerRound(instr *ssa.Instruction, imm roundingMode) { - x := instr.Arg() - if !x.Type().IsFloat() { - panic("BUG?") - } - var op sseOpcode - if x.Type().Bits() == 64 { - op = sseOpcodeRoundsd - } else { - op = sseOpcodeRoundss - } - - xDef := m.c.ValueDefinition(x) - rm := m.getOperand_Mem_Reg(xDef) - rd := m.c.VRegOf(instr.Return()) - - xmm := m.allocateInstr().asXmmUnaryRmRImm(op, uint8(imm), rm, rd) - m.insert(xmm) -} - -func (m *machine) lowerFminFmax(instr *ssa.Instruction) { - x, y := instr.Arg2() - if !x.Type().IsFloat() { - panic("BUG?") - } - - _64 := x.Type().Bits() == 64 - isMin := instr.Opcode() == ssa.OpcodeFmin - var minMaxOp sseOpcode - - switch { - case _64 && isMin: - minMaxOp = sseOpcodeMinpd - case _64 && !isMin: - minMaxOp = sseOpcodeMaxpd - case !_64 && isMin: - minMaxOp = sseOpcodeMinps - case !_64 && !isMin: - minMaxOp = sseOpcodeMaxps - } - - xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y) - rm := m.getOperand_Reg(xDef) - // We cannot ensure that y is aligned to 16 bytes, so we have to use it on reg. - rn := m.getOperand_Reg(yDef) - rd := m.c.VRegOf(instr.Return()) - - tmp := m.copyToTmp(rm.reg()) - - // Check if this is (either x1 or x2 is NaN) or (x1 equals x2) case. - cmp := m.allocateInstr() - if _64 { - cmp.asXmmCmpRmR(sseOpcodeUcomisd, rn, tmp) - } else { - cmp.asXmmCmpRmR(sseOpcodeUcomiss, rn, tmp) - } - m.insert(cmp) - - // At this point, we have the three cases of conditional flags below - // (See https://www.felixcloutier.com/x86/ucomiss#operation for detail.) - // - // 1) Two values are NaN-free and different: All flags are cleared. - // 2) Two values are NaN-free and equal: Only ZF flags is set. - // 3) One of Two values is NaN: ZF, PF and CF flags are set. - - // Jump instruction to handle 1) case by checking the ZF flag - // as ZF is only set for 2) and 3) cases. - nanFreeOrDiffJump := m.allocateInstr() - m.insert(nanFreeOrDiffJump) - - // Start handling 2) and 3). - - // Jump if one of two values is NaN by checking the parity flag (PF). - ifIsNan := m.allocateInstr() - m.insert(ifIsNan) - - // Start handling 2) NaN-free and equal. - - // Before we exit this case, we have to ensure that positive zero (or negative zero for min instruction) is - // returned if two values are positive and negative zeros. - var op sseOpcode - switch { - case !_64 && isMin: - op = sseOpcodeOrps - case _64 && isMin: - op = sseOpcodeOrpd - case !_64 && !isMin: - op = sseOpcodeAndps - case _64 && !isMin: - op = sseOpcodeAndpd - } - orAnd := m.allocateInstr() - orAnd.asXmmRmR(op, rn, tmp) - m.insert(orAnd) - - // Done, jump to end. - sameExitJump := m.allocateInstr() - m.insert(sameExitJump) - - // Start handling 3) either is NaN. - isNanTarget, isNan := m.allocateBrTarget() - m.insert(isNanTarget) - ifIsNan.asJmpIf(condP, newOperandLabel(isNan)) - - // We emit the ADD instruction to produce the NaN in tmp. - add := m.allocateInstr() - if _64 { - add.asXmmRmR(sseOpcodeAddsd, rn, tmp) - } else { - add.asXmmRmR(sseOpcodeAddss, rn, tmp) - } - m.insert(add) - - // Exit from the NaN case branch. - nanExitJmp := m.allocateInstr() - m.insert(nanExitJmp) - - // Start handling 1). - doMinMaxTarget, doMinMax := m.allocateBrTarget() - m.insert(doMinMaxTarget) - nanFreeOrDiffJump.asJmpIf(condNZ, newOperandLabel(doMinMax)) - - // Now handle the NaN-free and different values case. - minMax := m.allocateInstr() - minMax.asXmmRmR(minMaxOp, rn, tmp) - m.insert(minMax) - - endNop, end := m.allocateBrTarget() - m.insert(endNop) - nanExitJmp.asJmp(newOperandLabel(end)) - sameExitJump.asJmp(newOperandLabel(end)) - - m.copyTo(tmp, rd) -} - -func (m *machine) lowerFcopysign(instr *ssa.Instruction) { - x, y := instr.Arg2() - if !x.Type().IsFloat() { - panic("BUG") - } - - _64 := x.Type().Bits() == 64 - - xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y) - rm := m.getOperand_Reg(xDef) - rn := m.getOperand_Reg(yDef) - rd := m.c.VRegOf(instr.Return()) - - // Clear the non-sign bits of src via AND with the mask. - var opAnd, opOr sseOpcode - var signMask uint64 - if _64 { - signMask, opAnd, opOr = 0x8000000000000000, sseOpcodeAndpd, sseOpcodeOrpd - } else { - signMask, opAnd, opOr = 0x80000000, sseOpcodeAndps, sseOpcodeOrps - } - - signBitReg := m.c.AllocateVReg(x.Type()) - m.lowerFconst(signBitReg, signMask, _64) - nonSignBitReg := m.c.AllocateVReg(x.Type()) - m.lowerFconst(nonSignBitReg, ^signMask, _64) - - // Extract the sign bits of rn. - and := m.allocateInstr().asXmmRmR(opAnd, rn, signBitReg) - m.insert(and) - - // Clear the sign bit of dst via AND with the non-sign bit mask. - xor := m.allocateInstr().asXmmRmR(opAnd, rm, nonSignBitReg) - m.insert(xor) - - // Copy the sign bits of src to dst via OR. - or := m.allocateInstr().asXmmRmR(opOr, newOperandReg(signBitReg), nonSignBitReg) - m.insert(or) - - m.copyTo(nonSignBitReg, rd) -} - -func (m *machine) lowerBitcast(instr *ssa.Instruction) { - x, dstTyp := instr.BitcastData() - srcTyp := x.Type() - rn := m.getOperand_Reg(m.c.ValueDefinition(x)) - rd := m.c.VRegOf(instr.Return()) - switch { - case srcTyp == ssa.TypeF32 && dstTyp == ssa.TypeI32: - cvt := m.allocateInstr().asXmmToGpr(sseOpcodeMovd, rn.reg(), rd, false) - m.insert(cvt) - case srcTyp == ssa.TypeI32 && dstTyp == ssa.TypeF32: - cvt := m.allocateInstr().asGprToXmm(sseOpcodeMovd, rn, rd, false) - m.insert(cvt) - case srcTyp == ssa.TypeF64 && dstTyp == ssa.TypeI64: - cvt := m.allocateInstr().asXmmToGpr(sseOpcodeMovq, rn.reg(), rd, true) - m.insert(cvt) - case srcTyp == ssa.TypeI64 && dstTyp == ssa.TypeF64: - cvt := m.allocateInstr().asGprToXmm(sseOpcodeMovq, rn, rd, true) - m.insert(cvt) - default: - panic(fmt.Sprintf("invalid bitcast from %s to %s", srcTyp, dstTyp)) - } -} - -func (m *machine) lowerFcvtToSint(ctxVReg, rn, rd regalloc.VReg, src64, dst64, sat bool) { - var tmpXmm regalloc.VReg - if dst64 { - tmpXmm = m.c.AllocateVReg(ssa.TypeF64) - } else { - tmpXmm = m.c.AllocateVReg(ssa.TypeF32) - } - - m.insert(m.allocateInstr().asDefineUninitializedReg(tmpXmm)) - tmpGp, tmpGp2 := m.c.AllocateVReg(ssa.TypeI64), m.c.AllocateVReg(ssa.TypeI64) - m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp)) - m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp2)) - - m.insert(m.allocateFcvtToSintSequence(ctxVReg, rn, tmpGp, tmpGp2, tmpXmm, src64, dst64, sat)) - m.copyTo(tmpGp, rd) -} - -func (m *machine) lowerFcvtToSintSequenceAfterRegalloc(i *instruction) { - execCtx, src, tmpGp, tmpGp2, tmpXmm, src64, dst64, sat := i.fcvtToSintSequenceData() - var cmpOp, truncOp sseOpcode - if src64 { - cmpOp, truncOp = sseOpcodeUcomisd, sseOpcodeCvttsd2si - } else { - cmpOp, truncOp = sseOpcodeUcomiss, sseOpcodeCvttss2si - } - - trunc := m.allocateInstr() - trunc.asXmmToGpr(truncOp, src, tmpGp, dst64) - m.insert(trunc) - - // Check if the dst operand was INT_MIN, by checking it against 1. - cmp1 := m.allocateInstr() - cmp1.asCmpRmiR(true, newOperandImm32(1), tmpGp, dst64) - m.insert(cmp1) - - // If no overflow, then we are done. - doneTarget, done := m.allocateBrTarget() - ifNoOverflow := m.allocateInstr() - ifNoOverflow.asJmpIf(condNO, newOperandLabel(done)) - m.insert(ifNoOverflow) - - // Now, check for NaN. - cmpNan := m.allocateInstr() - cmpNan.asXmmCmpRmR(cmpOp, newOperandReg(src), src) - m.insert(cmpNan) - - // We allocate the "non-nan target" here, but we will insert it later. - notNanTarget, notNaN := m.allocateBrTarget() - ifNotNan := m.allocateInstr() - ifNotNan.asJmpIf(condNP, newOperandLabel(notNaN)) - m.insert(ifNotNan) - - if sat { - // If NaN and saturating, return 0. - zeroDst := m.allocateInstr().asZeros(tmpGp) - m.insert(zeroDst) - - jmpEnd := m.allocateInstr() - jmpEnd.asJmp(newOperandLabel(done)) - m.insert(jmpEnd) - - // Otherwise: - m.insert(notNanTarget) - - // Zero-out the tmp register. - zero := m.allocateInstr().asZeros(tmpXmm) - m.insert(zero) - - cmpXmm := m.allocateInstr().asXmmCmpRmR(cmpOp, newOperandReg(tmpXmm), src) - m.insert(cmpXmm) - - // if >= jump to end. - jmpEnd2 := m.allocateInstr() - jmpEnd2.asJmpIf(condB, newOperandLabel(done)) - m.insert(jmpEnd2) - - // Otherwise, saturate to INT_MAX. - if dst64 { - m.lowerIconst(tmpGp, math.MaxInt64, dst64) - } else { - m.lowerIconst(tmpGp, math.MaxInt32, dst64) - } - - } else { - - // If non-sat, NaN, trap. - m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeInvalidConversionToInteger) - - // Otherwise, we will jump here. - m.insert(notNanTarget) - - // jump over trap if src larger than threshold - condAboveThreshold := condNB - - // The magic constants are various combination of minInt for int[32|64] represented as float[32|64]. - var minInt uint64 - switch { - case src64 && dst64: - minInt = 0xc3e0000000000000 - case src64 && !dst64: - condAboveThreshold = condNBE - minInt = 0xC1E0_0000_0020_0000 - case !src64 && dst64: - minInt = 0xDF00_0000 - case !src64 && !dst64: - minInt = 0xCF00_0000 - } - - loadToGP := m.allocateInstr().asImm(tmpGp2, minInt, src64) - m.insert(loadToGP) - - movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpGp2), tmpXmm, src64) - m.insert(movToXmm) - - cmpXmm := m.allocateInstr().asXmmCmpRmR(cmpOp, newOperandReg(tmpXmm), src) - m.insert(cmpXmm) - - jmpIfLarger := m.allocateInstr() - checkPositiveTarget, checkPositive := m.allocateBrTarget() - jmpIfLarger.asJmpIf(condAboveThreshold, newOperandLabel(checkPositive)) - m.insert(jmpIfLarger) - - m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow) - - // If positive, it was a real overflow. - m.insert(checkPositiveTarget) - - // Zero out the temp register. - xorpd := m.allocateInstr() - xorpd.asXmmRmR(sseOpcodeXorpd, newOperandReg(tmpXmm), tmpXmm) - m.insert(xorpd) - - pos := m.allocateInstr() - pos.asXmmCmpRmR(cmpOp, newOperandReg(src), tmpXmm) - m.insert(pos) - - // If >= jump to end. - jmp := m.allocateInstr().asJmpIf(condNB, newOperandLabel(done)) - m.insert(jmp) - m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow) - } - - m.insert(doneTarget) -} - -func (m *machine) lowerFcvtToUint(ctxVReg, rn, rd regalloc.VReg, src64, dst64, sat bool) { - tmpXmm, tmpXmm2 := m.c.AllocateVReg(ssa.TypeF64), m.c.AllocateVReg(ssa.TypeF64) - m.insert(m.allocateInstr().asDefineUninitializedReg(tmpXmm)) - m.insert(m.allocateInstr().asDefineUninitializedReg(tmpXmm2)) - tmpGp, tmpGp2 := m.c.AllocateVReg(ssa.TypeI64), m.c.AllocateVReg(ssa.TypeI64) - m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp)) - m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp2)) - - m.insert(m.allocateFcvtToUintSequence( - ctxVReg, rn, tmpGp, tmpGp2, tmpXmm, tmpXmm2, src64, dst64, sat, - )) - m.copyTo(tmpGp, rd) -} - -func (m *machine) lowerFcvtToUintSequenceAfterRegalloc(i *instruction) { - execCtx, src, tmpGp, tmpGp2, tmpXmm, tmpXmm2, src64, dst64, sat := i.fcvtToUintSequenceData() - - var subOp, cmpOp, truncOp sseOpcode - if src64 { - subOp, cmpOp, truncOp = sseOpcodeSubsd, sseOpcodeUcomisd, sseOpcodeCvttsd2si - } else { - subOp, cmpOp, truncOp = sseOpcodeSubss, sseOpcodeUcomiss, sseOpcodeCvttss2si - } - - doneTarget, done := m.allocateBrTarget() - - switch { - case src64 && dst64: - loadToGP := m.allocateInstr().asImm(tmpGp, 0x43e0000000000000, true) - m.insert(loadToGP) - movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpGp), tmpXmm, true) - m.insert(movToXmm) - case src64 && !dst64: - loadToGP := m.allocateInstr().asImm(tmpGp, 0x41e0000000000000, true) - m.insert(loadToGP) - movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpGp), tmpXmm, true) - m.insert(movToXmm) - case !src64 && dst64: - loadToGP := m.allocateInstr().asImm(tmpGp, 0x5f000000, false) - m.insert(loadToGP) - movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpGp), tmpXmm, false) - m.insert(movToXmm) - case !src64 && !dst64: - loadToGP := m.allocateInstr().asImm(tmpGp, 0x4f000000, false) - m.insert(loadToGP) - movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpGp), tmpXmm, false) - m.insert(movToXmm) - } - - cmp := m.allocateInstr() - cmp.asXmmCmpRmR(cmpOp, newOperandReg(tmpXmm), src) - m.insert(cmp) - - // If above `tmp` ("large threshold"), jump to `ifAboveThreshold` - ifAboveThresholdTarget, ifAboveThreshold := m.allocateBrTarget() - jmpIfAboveThreshold := m.allocateInstr() - jmpIfAboveThreshold.asJmpIf(condNB, newOperandLabel(ifAboveThreshold)) - m.insert(jmpIfAboveThreshold) - - ifNotNaNTarget, ifNotNaN := m.allocateBrTarget() - jmpIfNotNaN := m.allocateInstr() - jmpIfNotNaN.asJmpIf(condNP, newOperandLabel(ifNotNaN)) - m.insert(jmpIfNotNaN) - - // If NaN, handle the error condition. - if sat { - // On NaN, saturating, we just return 0. - zeros := m.allocateInstr().asZeros(tmpGp) - m.insert(zeros) - - jmpEnd := m.allocateInstr() - jmpEnd.asJmp(newOperandLabel(done)) - m.insert(jmpEnd) - } else { - // On NaN, non-saturating, we trap. - m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeInvalidConversionToInteger) - } - - // If not NaN, land here. - m.insert(ifNotNaNTarget) - - // Truncation happens here. - - trunc := m.allocateInstr() - trunc.asXmmToGpr(truncOp, src, tmpGp, dst64) - m.insert(trunc) - - // Check if the result is negative. - cmpNeg := m.allocateInstr() - cmpNeg.asCmpRmiR(true, newOperandImm32(0), tmpGp, dst64) - m.insert(cmpNeg) - - // If non-neg, jump to end. - jmpIfNonNeg := m.allocateInstr() - jmpIfNonNeg.asJmpIf(condNL, newOperandLabel(done)) - m.insert(jmpIfNonNeg) - - if sat { - // If the input was "small" (< 2**(width -1)), the only way to get an integer - // overflow is because the input was too small: saturate to the min value, i.e. 0. - zeros := m.allocateInstr().asZeros(tmpGp) - m.insert(zeros) - - jmpEnd := m.allocateInstr() - jmpEnd.asJmp(newOperandLabel(done)) - m.insert(jmpEnd) - } else { - // If not saturating, trap. - m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow) - } - - // If above the threshold, land here. - m.insert(ifAboveThresholdTarget) - - // tmpDiff := threshold - rn. - copySrc := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandReg(src), tmpXmm2) - m.insert(copySrc) - - sub := m.allocateInstr() - sub.asXmmRmR(subOp, newOperandReg(tmpXmm), tmpXmm2) // must be -0x8000000000000000 - m.insert(sub) - - trunc2 := m.allocateInstr() - trunc2.asXmmToGpr(truncOp, tmpXmm2, tmpGp, dst64) - m.insert(trunc2) - - // Check if the result is negative. - cmpNeg2 := m.allocateInstr().asCmpRmiR(true, newOperandImm32(0), tmpGp, dst64) - m.insert(cmpNeg2) - - ifNextLargeTarget, ifNextLarge := m.allocateBrTarget() - jmpIfNextLarge := m.allocateInstr() - jmpIfNextLarge.asJmpIf(condNL, newOperandLabel(ifNextLarge)) - m.insert(jmpIfNextLarge) - - if sat { - // The input was "large" (>= maxInt), so the only way to get an integer - // overflow is because the input was too large: saturate to the max value. - var maxInt uint64 - if dst64 { - maxInt = math.MaxUint64 - } else { - maxInt = math.MaxUint32 - } - m.lowerIconst(tmpGp, maxInt, dst64) - - jmpToEnd := m.allocateInstr() - jmpToEnd.asJmp(newOperandLabel(done)) - m.insert(jmpToEnd) - } else { - // If not saturating, trap. - m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow) - } - - m.insert(ifNextLargeTarget) - - var op operand - if dst64 { - m.lowerIconst(tmpGp2, 0x8000000000000000, true) - op = newOperandReg(tmpGp2) - } else { - op = newOperandImm32(0x80000000) - } - - add := m.allocateInstr() - add.asAluRmiR(aluRmiROpcodeAdd, op, tmpGp, dst64) - m.insert(add) - - m.insert(doneTarget) -} - -func (m *machine) lowerFcvtFromSint(rn, rd operand, src64, dst64 bool) { - var op sseOpcode - if dst64 { - op = sseOpcodeCvtsi2sd - } else { - op = sseOpcodeCvtsi2ss - } - - trunc := m.allocateInstr() - trunc.asGprToXmm(op, rn, rd.reg(), src64) - m.insert(trunc) -} - -func (m *machine) lowerFcvtFromUint(rn, rd operand, src64, dst64 bool) { - var op sseOpcode - if dst64 { - op = sseOpcodeCvtsi2sd - } else { - op = sseOpcodeCvtsi2ss - } - - // Src is 32 bit, then we just perform the conversion with 64 bit width. - // - // See the following link for why we use 64bit conversion for unsigned 32bit integer sources: - // https://stackoverflow.com/questions/41495498/fpu-operations-generated-by-gcc-during-casting-integer-to-float. - // - // Here's the summary: - // >> CVTSI2SS is indeed designed for converting a signed integer to a scalar single-precision float, - // >> not an unsigned integer like you have here. So what gives? Well, a 64-bit processor has 64-bit wide - // >> registers available, so the unsigned 32-bit input values can be stored as signed 64-bit intermediate values, - // >> which allows CVTSI2SS to be used after all. - // - if !src64 { - // Before we convert, we have to clear the higher 32-bits of the 64-bit register - // to get the correct result. - tmp := m.c.AllocateVReg(ssa.TypeI32) - m.insert(m.allocateInstr().asMovzxRmR(extModeLQ, rn, tmp)) - m.insert(m.allocateInstr().asGprToXmm(op, newOperandReg(tmp), rd.reg(), true)) - return - } - - // If uint64, we have to do a bit more work. - endTarget, end := m.allocateBrTarget() - - var tmpXmm regalloc.VReg - if dst64 { - tmpXmm = m.c.AllocateVReg(ssa.TypeF64) - } else { - tmpXmm = m.c.AllocateVReg(ssa.TypeF32) - } - - // Check if the most significant bit (sign bit) is set. - test := m.allocateInstr() - test.asCmpRmiR(false, rn, rn.reg(), src64) - m.insert(test) - - // Jump if the sign bit is set. - ifSignTarget, ifSign := m.allocateBrTarget() - jmpIfNeg := m.allocateInstr() - jmpIfNeg.asJmpIf(condS, newOperandLabel(ifSign)) - m.insert(jmpIfNeg) - - // If the sign bit is not set, we could fit the unsigned int into float32/float64. - // So, we convert it to float and emit jump instruction to exit from this branch. - cvt := m.allocateInstr() - cvt.asGprToXmm(op, rn, tmpXmm, src64) - m.insert(cvt) - - // We are done, jump to end. - jmpEnd := m.allocateInstr() - jmpEnd.asJmp(newOperandLabel(end)) - m.insert(jmpEnd) - - // Now handling the case where sign-bit is set. - // We emit the following sequences: - // mov %rn, %tmp - // shr 1, %tmp - // mov %rn, %tmp2 - // and 1, %tmp2 - // or %tmp2, %tmp - // cvtsi2ss %tmp, %xmm0 - // addsd %xmm0, %xmm0 - m.insert(ifSignTarget) - - tmp := m.copyToTmp(rn.reg()) - shr := m.allocateInstr() - shr.asShiftR(shiftROpShiftRightLogical, newOperandImm32(1), tmp, src64) - m.insert(shr) - - tmp2 := m.copyToTmp(rn.reg()) - and := m.allocateInstr() - and.asAluRmiR(aluRmiROpcodeAnd, newOperandImm32(1), tmp2, src64) - m.insert(and) - - or := m.allocateInstr() - or.asAluRmiR(aluRmiROpcodeOr, newOperandReg(tmp2), tmp, src64) - m.insert(or) - - cvt2 := m.allocateInstr() - cvt2.asGprToXmm(op, newOperandReg(tmp), tmpXmm, src64) - m.insert(cvt2) - - addsd := m.allocateInstr() - if dst64 { - addsd.asXmmRmR(sseOpcodeAddsd, newOperandReg(tmpXmm), tmpXmm) - } else { - addsd.asXmmRmR(sseOpcodeAddss, newOperandReg(tmpXmm), tmpXmm) - } - m.insert(addsd) - - m.insert(endTarget) - m.copyTo(tmpXmm, rd.reg()) -} - -func (m *machine) lowerVanyTrue(instr *ssa.Instruction) { - x := instr.Arg() - rm := m.getOperand_Reg(m.c.ValueDefinition(x)) - rd := m.c.VRegOf(instr.Return()) - - tmp := m.c.AllocateVReg(ssa.TypeI32) - - cmp := m.allocateInstr() - cmp.asXmmCmpRmR(sseOpcodePtest, rm, rm.reg()) - m.insert(cmp) - - setcc := m.allocateInstr() - setcc.asSetcc(condNZ, tmp) - m.insert(setcc) - - // Clear the irrelevant bits. - and := m.allocateInstr() - and.asAluRmiR(aluRmiROpcodeAnd, newOperandImm32(1), tmp, false) - m.insert(and) - - m.copyTo(tmp, rd) -} - -func (m *machine) lowerVallTrue(instr *ssa.Instruction) { - x, lane := instr.ArgWithLane() - var op sseOpcode - switch lane { - case ssa.VecLaneI8x16: - op = sseOpcodePcmpeqb - case ssa.VecLaneI16x8: - op = sseOpcodePcmpeqw - case ssa.VecLaneI32x4: - op = sseOpcodePcmpeqd - case ssa.VecLaneI64x2: - op = sseOpcodePcmpeqq - } - rm := m.getOperand_Reg(m.c.ValueDefinition(x)) - rd := m.c.VRegOf(instr.Return()) - - tmp := m.c.AllocateVReg(ssa.TypeV128) - - zeros := m.allocateInstr() - zeros.asZeros(tmp) - m.insert(zeros) - - pcmp := m.allocateInstr() - pcmp.asXmmRmR(op, rm, tmp) - m.insert(pcmp) - - test := m.allocateInstr() - test.asXmmCmpRmR(sseOpcodePtest, newOperandReg(tmp), tmp) - m.insert(test) - - tmp2 := m.c.AllocateVReg(ssa.TypeI32) - - setcc := m.allocateInstr() - setcc.asSetcc(condZ, tmp2) - m.insert(setcc) - - // Clear the irrelevant bits. - and := m.allocateInstr() - and.asAluRmiR(aluRmiROpcodeAnd, newOperandImm32(1), tmp2, false) - m.insert(and) - - m.copyTo(tmp2, rd) -} - -func (m *machine) lowerVhighBits(instr *ssa.Instruction) { - x, lane := instr.ArgWithLane() - rm := m.getOperand_Reg(m.c.ValueDefinition(x)) - rd := m.c.VRegOf(instr.Return()) - switch lane { - case ssa.VecLaneI8x16: - mov := m.allocateInstr() - mov.asXmmToGpr(sseOpcodePmovmskb, rm.reg(), rd, false) - m.insert(mov) - - case ssa.VecLaneI16x8: - // When we have: - // R1 = [R1(w1), R1(w2), R1(w3), R1(w4), R1(w5), R1(w6), R1(w7), R1(v8)] - // R2 = [R2(w1), R2(w2), R2(w3), R2(v4), R2(w5), R2(w6), R2(w7), R2(v8)] - // where RX(wn) is n-th signed word (16-bit) of RX register, - // - // "PACKSSWB R1, R2" produces - // R1 = [ - // byte_sat(R1(w1)), byte_sat(R1(w2)), byte_sat(R1(w3)), byte_sat(R1(w4)), - // byte_sat(R1(w5)), byte_sat(R1(w6)), byte_sat(R1(w7)), byte_sat(R1(w8)), - // byte_sat(R2(w1)), byte_sat(R2(w2)), byte_sat(R2(w3)), byte_sat(R2(w4)), - // byte_sat(R2(w5)), byte_sat(R2(w6)), byte_sat(R2(w7)), byte_sat(R2(w8)), - // ] - // where R1 is the destination register, and - // byte_sat(w) = int8(w) if w fits as signed 8-bit, - // 0x80 if w is less than 0x80 - // 0x7F if w is greater than 0x7f - // - // See https://www.felixcloutier.com/x86/packsswb:packssdw for detail. - // - // Therefore, v.register ends up having i-th and (i+8)-th bit set if i-th lane is negative (for i in 0..8). - tmp := m.copyToTmp(rm.reg()) - res := m.c.AllocateVReg(ssa.TypeI32) - - pak := m.allocateInstr() - pak.asXmmRmR(sseOpcodePacksswb, rm, tmp) - m.insert(pak) - - mov := m.allocateInstr() - mov.asXmmToGpr(sseOpcodePmovmskb, tmp, res, false) - m.insert(mov) - - // Clear the higher bits than 8. - shr := m.allocateInstr() - shr.asShiftR(shiftROpShiftRightLogical, newOperandImm32(8), res, false) - m.insert(shr) - - m.copyTo(res, rd) - - case ssa.VecLaneI32x4: - mov := m.allocateInstr() - mov.asXmmToGpr(sseOpcodeMovmskps, rm.reg(), rd, true) - m.insert(mov) - - case ssa.VecLaneI64x2: - mov := m.allocateInstr() - mov.asXmmToGpr(sseOpcodeMovmskpd, rm.reg(), rd, true) - m.insert(mov) - } -} - -func (m *machine) lowerVbnot(instr *ssa.Instruction) { - x := instr.Arg() - xDef := m.c.ValueDefinition(x) - rm := m.getOperand_Reg(xDef) - rd := m.c.VRegOf(instr.Return()) - - tmp := m.copyToTmp(rm.reg()) - tmp2 := m.c.AllocateVReg(ssa.TypeV128) - - // Ensure tmp2 is considered defined by regalloc. - m.insert(m.allocateInstr().asDefineUninitializedReg(tmp2)) - - // Set all bits on tmp register. - pak := m.allocateInstr() - pak.asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmp2), tmp2) - m.insert(pak) - - // Then XOR with tmp to reverse all bits on v.register. - xor := m.allocateInstr() - xor.asXmmRmR(sseOpcodePxor, newOperandReg(tmp2), tmp) - m.insert(xor) - - m.copyTo(tmp, rd) -} - -func (m *machine) lowerSplat(x, ret ssa.Value, lane ssa.VecLane) { - tmpDst := m.c.AllocateVReg(ssa.TypeV128) - m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst)) - - switch lane { - case ssa.VecLaneI8x16: - tmp := m.c.AllocateVReg(ssa.TypeV128) - m.insert(m.allocateInstr().asDefineUninitializedReg(tmp)) - xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, 0, xx, tmpDst)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmp), tmpDst)) - case ssa.VecLaneI16x8: - xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 0, xx, tmpDst)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 1, xx, tmpDst)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst)) - case ssa.VecLaneI32x4: - xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, 0, xx, tmpDst)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst)) - case ssa.VecLaneI64x2: - xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, xx, tmpDst)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, xx, tmpDst)) - case ssa.VecLaneF32x4: - xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeInsertps, 0, xx, tmpDst)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst)) - case ssa.VecLaneF64x2: - xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, xx, tmpDst)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMovlhps, xx, tmpDst)) - default: - panic(fmt.Sprintf("invalid lane type: %s", lane)) - } - - m.copyTo(tmpDst, m.c.VRegOf(ret)) -} - -func (m *machine) lowerShuffle(x, y ssa.Value, lo, hi uint64, ret ssa.Value) { - var xMask, yMask [2]uint64 - for i := 0; i < 8; i++ { - loLane := byte(lo >> (i * 8)) - if loLane < 16 { - xMask[0] |= uint64(loLane) << (i * 8) - yMask[0] |= uint64(0x80) << (i * 8) - } else { - xMask[0] |= uint64(0x80) << (i * 8) - yMask[0] |= uint64(loLane-16) << (i * 8) - } - hiLane := byte(hi >> (i * 8)) - if hiLane < 16 { - xMask[1] |= uint64(hiLane) << (i * 8) - yMask[1] |= uint64(0x80) << (i * 8) - } else { - xMask[1] |= uint64(0x80) << (i * 8) - yMask[1] |= uint64(hiLane-16) << (i * 8) - } - } - - xl, xmaskPos := m.allocateLabel() - m.consts = append(m.consts, _const{lo: xMask[0], hi: xMask[1], label: xl, labelPos: xmaskPos}) - yl, ymaskPos := m.allocateLabel() - m.consts = append(m.consts, _const{lo: yMask[0], hi: yMask[1], label: yl, labelPos: ymaskPos}) - - xx, yy := m.getOperand_Reg(m.c.ValueDefinition(x)), m.getOperand_Reg(m.c.ValueDefinition(y)) - tmpX, tmpY := m.copyToTmp(xx.reg()), m.copyToTmp(yy.reg()) - - // Apply mask to X. - tmp := m.c.AllocateVReg(ssa.TypeV128) - loadMaskLo := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(xl)), tmp) - m.insert(loadMaskLo) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmp), tmpX)) - - // Apply mask to Y. - loadMaskHi := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(yl)), tmp) - m.insert(loadMaskHi) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmp), tmpY)) - - // Combine the results. - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeOrps, newOperandReg(tmpX), tmpY)) - - m.copyTo(tmpY, m.c.VRegOf(ret)) -} - -func (m *machine) lowerVbBinOpUnaligned(op sseOpcode, x, y, ret ssa.Value) { - rn := m.getOperand_Reg(m.c.ValueDefinition(x)) - rm := m.getOperand_Reg(m.c.ValueDefinition(y)) - rd := m.c.VRegOf(ret) - - tmp := m.copyToTmp(rn.reg()) - - binOp := m.allocateInstr() - binOp.asXmmRmR(op, rm, tmp) - m.insert(binOp) - - m.copyTo(tmp, rd) -} - -func (m *machine) lowerVbBinOp(op sseOpcode, x, y, ret ssa.Value) { - rn := m.getOperand_Reg(m.c.ValueDefinition(x)) - rm := m.getOperand_Mem_Reg(m.c.ValueDefinition(y)) - rd := m.c.VRegOf(ret) - - tmp := m.copyToTmp(rn.reg()) - - binOp := m.allocateInstr() - binOp.asXmmRmR(op, rm, tmp) - m.insert(binOp) - - m.copyTo(tmp, rd) -} - -func (m *machine) lowerVFcmp(x, y ssa.Value, c ssa.FloatCmpCond, ret ssa.Value, lane ssa.VecLane) { - var cmpOp sseOpcode - switch lane { - case ssa.VecLaneF32x4: - cmpOp = sseOpcodeCmpps - case ssa.VecLaneF64x2: - cmpOp = sseOpcodeCmppd - default: - panic(fmt.Sprintf("invalid lane type: %s", lane)) - } - - xx, yy := m.c.ValueDefinition(x), m.c.ValueDefinition(y) - var cmpImm cmpPred - switch c { - case ssa.FloatCmpCondGreaterThan: - yy, xx = xx, yy - cmpImm = cmpPredLT_OS - case ssa.FloatCmpCondGreaterThanOrEqual: - yy, xx = xx, yy - cmpImm = cmpPredLE_OS - case ssa.FloatCmpCondEqual: - cmpImm = cmpPredEQ_OQ - case ssa.FloatCmpCondNotEqual: - cmpImm = cmpPredNEQ_UQ - case ssa.FloatCmpCondLessThan: - cmpImm = cmpPredLT_OS - case ssa.FloatCmpCondLessThanOrEqual: - cmpImm = cmpPredLE_OS - default: - panic(fmt.Sprintf("invalid float comparison condition: %s", c)) - } - - tmp := m.c.AllocateVReg(ssa.TypeV128) - xxx := m.getOperand_Mem_Reg(xx) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, xxx, tmp)) - - rm := m.getOperand_Mem_Reg(yy) - m.insert(m.allocateInstr().asXmmRmRImm(cmpOp, byte(cmpImm), rm, tmp)) - - m.copyTo(tmp, m.c.VRegOf(ret)) -} - -func (m *machine) lowerVIcmp(x, y ssa.Value, c ssa.IntegerCmpCond, ret ssa.Value, lane ssa.VecLane) { - var eq, gt, maxu, minu, mins sseOpcode - switch lane { - case ssa.VecLaneI8x16: - eq, gt, maxu, minu, mins = sseOpcodePcmpeqb, sseOpcodePcmpgtb, sseOpcodePmaxub, sseOpcodePminub, sseOpcodePminsb - case ssa.VecLaneI16x8: - eq, gt, maxu, minu, mins = sseOpcodePcmpeqw, sseOpcodePcmpgtw, sseOpcodePmaxuw, sseOpcodePminuw, sseOpcodePminsw - case ssa.VecLaneI32x4: - eq, gt, maxu, minu, mins = sseOpcodePcmpeqd, sseOpcodePcmpgtd, sseOpcodePmaxud, sseOpcodePminud, sseOpcodePminsd - case ssa.VecLaneI64x2: - eq, gt = sseOpcodePcmpeqq, sseOpcodePcmpgtq - default: - panic(fmt.Sprintf("invalid lane type: %s", lane)) - } - - tmp := m.c.AllocateVReg(ssa.TypeV128) - var op operand - switch c { - case ssa.IntegerCmpCondSignedLessThanOrEqual: - if lane == ssa.VecLaneI64x2 { - x := m.getOperand_Mem_Reg(m.c.ValueDefinition(x)) - // Copy x to tmp. - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, x, tmp)) - op = m.getOperand_Mem_Reg(m.c.ValueDefinition(y)) - } else { - y := m.getOperand_Mem_Reg(m.c.ValueDefinition(y)) - // Copy y to tmp. - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, y, tmp)) - op = m.getOperand_Mem_Reg(m.c.ValueDefinition(x)) - } - case ssa.IntegerCmpCondSignedGreaterThanOrEqual: - if lane == ssa.VecLaneI64x2 { - y := m.getOperand_Mem_Reg(m.c.ValueDefinition(y)) - // Copy y to tmp. - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, y, tmp)) - op = m.getOperand_Mem_Reg(m.c.ValueDefinition(x)) - } else { - x := m.getOperand_Mem_Reg(m.c.ValueDefinition(x)) - // Copy x to tmp. - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, x, tmp)) - op = m.getOperand_Mem_Reg(m.c.ValueDefinition(y)) - } - case ssa.IntegerCmpCondSignedLessThan, ssa.IntegerCmpCondUnsignedLessThan, ssa.IntegerCmpCondUnsignedLessThanOrEqual: - y := m.getOperand_Mem_Reg(m.c.ValueDefinition(y)) - // Copy y to tmp. - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, y, tmp)) - op = m.getOperand_Mem_Reg(m.c.ValueDefinition(x)) - default: - x := m.getOperand_Mem_Reg(m.c.ValueDefinition(x)) - // Copy x to tmp. - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, x, tmp)) - op = m.getOperand_Mem_Reg(m.c.ValueDefinition(y)) - } - - switch c { - case ssa.IntegerCmpCondEqual: - m.insert(m.allocateInstr().asXmmRmR(eq, op, tmp)) - case ssa.IntegerCmpCondNotEqual: - // First we compare for equality. - m.insert(m.allocateInstr().asXmmRmR(eq, op, tmp)) - // Then flip the bits. To do so, we set all bits on tmp2. - tmp2 := m.c.AllocateVReg(ssa.TypeV128) - m.insert(m.allocateInstr().asDefineUninitializedReg(tmp2)) - m.insert(m.allocateInstr().asXmmRmR(eq, newOperandReg(tmp2), tmp2)) - // And then xor with tmp. - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp2), tmp)) - case ssa.IntegerCmpCondSignedGreaterThan, ssa.IntegerCmpCondSignedLessThan: - m.insert(m.allocateInstr().asXmmRmR(gt, op, tmp)) - case ssa.IntegerCmpCondSignedGreaterThanOrEqual, ssa.IntegerCmpCondSignedLessThanOrEqual: - if lane == ssa.VecLaneI64x2 { - m.insert(m.allocateInstr().asXmmRmR(gt, op, tmp)) - // Then flip the bits. To do so, we set all bits on tmp2. - tmp2 := m.c.AllocateVReg(ssa.TypeV128) - m.insert(m.allocateInstr().asDefineUninitializedReg(tmp2)) - m.insert(m.allocateInstr().asXmmRmR(eq, newOperandReg(tmp2), tmp2)) - // And then xor with tmp. - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp2), tmp)) - } else { - // First take min of x and y. - m.insert(m.allocateInstr().asXmmRmR(mins, op, tmp)) - // Then compare for equality. - m.insert(m.allocateInstr().asXmmRmR(eq, op, tmp)) - } - case ssa.IntegerCmpCondUnsignedGreaterThan, ssa.IntegerCmpCondUnsignedLessThan: - // First maxu of x and y. - m.insert(m.allocateInstr().asXmmRmR(maxu, op, tmp)) - // Then compare for equality. - m.insert(m.allocateInstr().asXmmRmR(eq, op, tmp)) - // Then flip the bits. To do so, we set all bits on tmp2. - tmp2 := m.c.AllocateVReg(ssa.TypeV128) - m.insert(m.allocateInstr().asDefineUninitializedReg(tmp2)) - m.insert(m.allocateInstr().asXmmRmR(eq, newOperandReg(tmp2), tmp2)) - // And then xor with tmp. - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp2), tmp)) - case ssa.IntegerCmpCondUnsignedGreaterThanOrEqual, ssa.IntegerCmpCondUnsignedLessThanOrEqual: - m.insert(m.allocateInstr().asXmmRmR(minu, op, tmp)) - m.insert(m.allocateInstr().asXmmRmR(eq, op, tmp)) - default: - panic("BUG") - } - - m.copyTo(tmp, m.c.VRegOf(ret)) -} - -func (m *machine) lowerVbandnot(instr *ssa.Instruction, op sseOpcode) { - x, y := instr.Arg2() - xDef := m.c.ValueDefinition(x) - yDef := m.c.ValueDefinition(y) - rm, rn := m.getOperand_Reg(xDef), m.getOperand_Reg(yDef) - rd := m.c.VRegOf(instr.Return()) - - tmp := m.copyToTmp(rn.reg()) - - // pandn between rn, rm. - pand := m.allocateInstr() - pand.asXmmRmR(sseOpcodePandn, rm, tmp) - m.insert(pand) - - m.copyTo(tmp, rd) -} - -func (m *machine) lowerVbitselect(instr *ssa.Instruction) { - c, x, y := instr.SelectData() - xDef := m.c.ValueDefinition(x) - yDef := m.c.ValueDefinition(y) - rm, rn := m.getOperand_Reg(xDef), m.getOperand_Reg(yDef) - creg := m.getOperand_Reg(m.c.ValueDefinition(c)) - rd := m.c.VRegOf(instr.Return()) - - tmpC := m.copyToTmp(creg.reg()) - tmpX := m.copyToTmp(rm.reg()) - - // And between c, x (overwrites x). - pand := m.allocateInstr() - pand.asXmmRmR(sseOpcodePand, creg, tmpX) - m.insert(pand) - - // Andn between y, c (overwrites c). - pandn := m.allocateInstr() - pandn.asXmmRmR(sseOpcodePandn, rn, tmpC) - m.insert(pandn) - - por := m.allocateInstr() - por.asXmmRmR(sseOpcodePor, newOperandReg(tmpC), tmpX) - m.insert(por) - - m.copyTo(tmpX, rd) -} - -func (m *machine) lowerVFmin(instr *ssa.Instruction) { - x, y, lane := instr.Arg2WithLane() - rn := m.getOperand_Reg(m.c.ValueDefinition(x)) - rm := m.getOperand_Reg(m.c.ValueDefinition(y)) - rd := m.c.VRegOf(instr.Return()) - - var min, cmp, andn, or, srl /* shift right logical */ sseOpcode - var shiftNumToInverseNaN uint32 - if lane == ssa.VecLaneF32x4 { - min, cmp, andn, or, srl, shiftNumToInverseNaN = sseOpcodeMinps, sseOpcodeCmpps, sseOpcodeAndnps, sseOpcodeOrps, sseOpcodePsrld, 0xa - } else { - min, cmp, andn, or, srl, shiftNumToInverseNaN = sseOpcodeMinpd, sseOpcodeCmppd, sseOpcodeAndnpd, sseOpcodeOrpd, sseOpcodePsrlq, 0xd - } - - tmp1 := m.copyToTmp(rn.reg()) - tmp2 := m.copyToTmp(rm.reg()) - - // tmp1=min(rn, rm) - minIns1 := m.allocateInstr() - minIns1.asXmmRmR(min, rn, tmp2) - m.insert(minIns1) - - // tmp2=min(rm, rn) - minIns2 := m.allocateInstr() - minIns2.asXmmRmR(min, rm, tmp1) - m.insert(minIns2) - - // tmp3:=tmp1=min(rn, rm) - tmp3 := m.copyToTmp(tmp1) - - // tmp1 = -0 if (rn == -0 || rm == -0) && rn != NaN && rm !=NaN - // NaN if rn == NaN || rm == NaN - // min(rm, rm) otherwise - orIns := m.allocateInstr() - orIns.asXmmRmR(or, newOperandReg(tmp2), tmp1) - m.insert(orIns) - - // tmp3 is originally min(rn,rm). - // tmp3 = 0^ (set all bits) if rn == NaN || rm == NaN - // 0 otherwise - cmpIns := m.allocateInstr() - cmpIns.asXmmRmRImm(cmp, uint8(cmpPredUNORD_Q), newOperandReg(tmp2), tmp3) - m.insert(cmpIns) - - // tmp1 = -0 if (rn == -0 || rm == -0) && rn != NaN && rm !=NaN - // ^0 if rn == NaN || rm == NaN - // min(v1, v2) otherwise - orIns2 := m.allocateInstr() - orIns2.asXmmRmR(or, newOperandReg(tmp3), tmp1) - m.insert(orIns2) - - // tmp3 = set all bits on the mantissa bits - // 0 otherwise - shift := m.allocateInstr() - shift.asXmmRmiReg(srl, newOperandImm32(shiftNumToInverseNaN), tmp3) - m.insert(shift) - - // tmp3 = tmp1 and !tmp3 - // = -0 if (rn == -0 || rm == -0) && rn != NaN && rm !=NaN - // set all bits on exponential and sign bit (== NaN) if rn == NaN || rm == NaN - // min(rn, rm) otherwise - andnIns := m.allocateInstr() - andnIns.asXmmRmR(andn, newOperandReg(tmp1), tmp3) - m.insert(andnIns) - - m.copyTo(tmp3, rd) -} - -func (m *machine) lowerVFmax(instr *ssa.Instruction) { - x, y, lane := instr.Arg2WithLane() - rn := m.getOperand_Reg(m.c.ValueDefinition(x)) - rm := m.getOperand_Reg(m.c.ValueDefinition(y)) - rd := m.c.VRegOf(instr.Return()) - - var max, cmp, andn, or, xor, sub, srl /* shift right logical */ sseOpcode - var shiftNumToInverseNaN uint32 - if lane == ssa.VecLaneF32x4 { - max, cmp, andn, or, xor, sub, srl, shiftNumToInverseNaN = sseOpcodeMaxps, sseOpcodeCmpps, sseOpcodeAndnps, sseOpcodeOrps, sseOpcodeXorps, sseOpcodeSubps, sseOpcodePsrld, 0xa - } else { - max, cmp, andn, or, xor, sub, srl, shiftNumToInverseNaN = sseOpcodeMaxpd, sseOpcodeCmppd, sseOpcodeAndnpd, sseOpcodeOrpd, sseOpcodeXorpd, sseOpcodeSubpd, sseOpcodePsrlq, 0xd - } - - tmp0 := m.copyToTmp(rm.reg()) - tmp1 := m.copyToTmp(rn.reg()) - - // tmp0=max(rn, rm) - maxIns1 := m.allocateInstr() - maxIns1.asXmmRmR(max, rn, tmp0) - m.insert(maxIns1) - - // tmp1=max(rm, rn) - maxIns2 := m.allocateInstr() - maxIns2.asXmmRmR(max, rm, tmp1) - m.insert(maxIns2) - - // tmp2=max(rm, rn) - tmp2 := m.copyToTmp(tmp1) - - // tmp2 = -0 if (rn == -0 && rm == 0) || (rn == 0 && rm == -0) - // 0 if (rn == 0 && rm == 0) - // -0 if (rn == -0 && rm == -0) - // v1^v2 if rn == NaN || rm == NaN - // 0 otherwise - xorInstr := m.allocateInstr() - xorInstr.asXmmRmR(xor, newOperandReg(tmp0), tmp2) - m.insert(xorInstr) - // tmp1 = -0 if (rn == -0 && rm == 0) || (rn == 0 && rm == -0) - // 0 if (rn == 0 && rm == 0) - // -0 if (rn == -0 && rm == -0) - // NaN if rn == NaN || rm == NaN - // max(v1, v2) otherwise - orInstr := m.allocateInstr() - orInstr.asXmmRmR(or, newOperandReg(tmp2), tmp1) - m.insert(orInstr) - - tmp3 := m.copyToTmp(tmp1) - - // tmp3 = 0 if (rn == -0 && rm == 0) || (rn == 0 && rm == -0) || (rn == 0 && rm == 0) - // -0 if (rn == -0 && rm == -0) - // NaN if rn == NaN || rm == NaN - // max(v1, v2) otherwise - // - // Note: -0 - (-0) = 0 (!= -0) in floating point operation. - subIns := m.allocateInstr() - subIns.asXmmRmR(sub, newOperandReg(tmp2), tmp3) - m.insert(subIns) - - // tmp1 = 0^ if rn == NaN || rm == NaN - cmpIns := m.allocateInstr() - cmpIns.asXmmRmRImm(cmp, uint8(cmpPredUNORD_Q), newOperandReg(tmp1), tmp1) - m.insert(cmpIns) - - // tmp1 = set all bits on the mantissa bits - // 0 otherwise - shift := m.allocateInstr() - shift.asXmmRmiReg(srl, newOperandImm32(shiftNumToInverseNaN), tmp1) - m.insert(shift) - - andnIns := m.allocateInstr() - andnIns.asXmmRmR(andn, newOperandReg(tmp3), tmp1) - m.insert(andnIns) - - m.copyTo(tmp1, rd) -} - -func (m *machine) lowerVFabs(instr *ssa.Instruction) { - x, lane := instr.ArgWithLane() - rm := m.getOperand_Mem_Reg(m.c.ValueDefinition(x)) - rd := m.c.VRegOf(instr.Return()) - - tmp := m.c.AllocateVReg(ssa.TypeV128) - - def := m.allocateInstr() - def.asDefineUninitializedReg(tmp) - m.insert(def) - - // Set all bits on tmp. - pcmp := m.allocateInstr() - pcmp.asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmp), tmp) - m.insert(pcmp) - - switch lane { - case ssa.VecLaneF32x4: - // Shift right packed single floats by 1 to clear the sign bits. - shift := m.allocateInstr() - shift.asXmmRmiReg(sseOpcodePsrld, newOperandImm32(1), tmp) - m.insert(shift) - // Clear the sign bit of rm. - andp := m.allocateInstr() - andp.asXmmRmR(sseOpcodeAndpd, rm, tmp) - m.insert(andp) - case ssa.VecLaneF64x2: - // Shift right packed single floats by 1 to clear the sign bits. - shift := m.allocateInstr() - shift.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(1), tmp) - m.insert(shift) - // Clear the sign bit of rm. - andp := m.allocateInstr() - andp.asXmmRmR(sseOpcodeAndps, rm, tmp) - m.insert(andp) - } - - m.copyTo(tmp, rd) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go deleted file mode 100644 index e53729860..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go +++ /dev/null @@ -1,303 +0,0 @@ -package amd64 - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" -) - -// PostRegAlloc implements backend.Machine. -func (m *machine) PostRegAlloc() { - m.setupPrologue() - m.postRegAlloc() -} - -func (m *machine) setupPrologue() { - cur := m.rootInstr - prevInitInst := cur.next - - // At this point, we have the stack layout as follows: - // - // (high address) - // +-----------------+ <----- RBP (somewhere in the middle of the stack) - // | ....... | - // | ret Y | - // | ....... | - // | ret 0 | - // | arg X | - // | ....... | - // | arg 1 | - // | arg 0 | - // | Return Addr | - // RSP ----> +-----------------+ - // (low address) - - // First, we push the RBP, and update the RBP to the current RSP. - // - // (high address) (high address) - // RBP ----> +-----------------+ +-----------------+ - // | ....... | | ....... | - // | ret Y | | ret Y | - // | ....... | | ....... | - // | ret 0 | | ret 0 | - // | arg X | | arg X | - // | ....... | ====> | ....... | - // | arg 1 | | arg 1 | - // | arg 0 | | arg 0 | - // | Return Addr | | Return Addr | - // RSP ----> +-----------------+ | Caller_RBP | - // (low address) +-----------------+ <----- RSP, RBP - // - cur = m.setupRBPRSP(cur) - - if !m.stackBoundsCheckDisabled { - cur = m.insertStackBoundsCheck(m.requiredStackSize(), cur) - } - - // - // (high address) - // +-----------------+ +-----------------+ - // | ....... | | ....... | - // | ret Y | | ret Y | - // | ....... | | ....... | - // | ret 0 | | ret 0 | - // | arg X | | arg X | - // | ....... | | ....... | - // | arg 1 | | arg 1 | - // | arg 0 | | arg 0 | - // | xxxxx | | xxxxx | - // | Return Addr | | Return Addr | - // | Caller_RBP | ====> | Caller_RBP | - // RBP,RSP->+-----------------+ +-----------------+ <----- RBP - // (low address) | clobbered M | - // | clobbered 1 | - // | ........... | - // | clobbered 0 | - // +-----------------+ <----- RSP - // - if regs := m.clobberedRegs; len(regs) > 0 { - for i := range regs { - r := regs[len(regs)-1-i] // Reverse order. - if r.RegType() == regalloc.RegTypeInt { - cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r))) - } else { - // Push the XMM register is not supported by the PUSH instruction. - cur = m.addRSP(-16, cur) - push := m.allocateInstr().asXmmMovRM( - sseOpcodeMovdqu, r, newOperandMem(m.newAmodeImmReg(0, rspVReg)), - ) - cur = linkInstr(cur, push) - } - } - } - - if size := m.spillSlotSize; size > 0 { - // Simply decrease the RSP to allocate the spill slots. - // sub $size, %rsp - cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(uint32(size)), rspVReg, true)) - - // At this point, we have the stack layout as follows: - // - // (high address) - // +-----------------+ - // | ....... | - // | ret Y | - // | ....... | - // | ret 0 | - // | arg X | - // | ....... | - // | arg 1 | - // | arg 0 | - // | ReturnAddress | - // | Caller_RBP | - // +-----------------+ <--- RBP - // | clobbered M | - // | ............ | - // | clobbered 1 | - // | clobbered 0 | - // | spill slot N | - // | ............ | - // | spill slot 0 | - // +-----------------+ <--- RSP - // (low address) - } - - linkInstr(cur, prevInitInst) -} - -// postRegAlloc does multiple things while walking through the instructions: -// 1. Inserts the epilogue code. -// 2. Removes the redundant copy instruction. -// 3. Inserts the dec/inc RSP instruction right before/after the call instruction. -// 4. Lowering that is supposed to be done after regalloc. -func (m *machine) postRegAlloc() { - for cur := m.rootInstr; cur != nil; cur = cur.next { - switch k := cur.kind; k { - case ret: - m.setupEpilogueAfter(cur.prev) - continue - case fcvtToSintSequence, fcvtToUintSequence: - m.pendingInstructions = m.pendingInstructions[:0] - if k == fcvtToSintSequence { - m.lowerFcvtToSintSequenceAfterRegalloc(cur) - } else { - m.lowerFcvtToUintSequenceAfterRegalloc(cur) - } - prev := cur.prev - next := cur.next - cur := prev - for _, instr := range m.pendingInstructions { - cur = linkInstr(cur, instr) - } - linkInstr(cur, next) - continue - case xmmCMov: - m.pendingInstructions = m.pendingInstructions[:0] - m.lowerXmmCmovAfterRegAlloc(cur) - prev := cur.prev - next := cur.next - cur := prev - for _, instr := range m.pendingInstructions { - cur = linkInstr(cur, instr) - } - linkInstr(cur, next) - continue - case idivRemSequence: - m.pendingInstructions = m.pendingInstructions[:0] - m.lowerIDivRemSequenceAfterRegAlloc(cur) - prev := cur.prev - next := cur.next - cur := prev - for _, instr := range m.pendingInstructions { - cur = linkInstr(cur, instr) - } - linkInstr(cur, next) - continue - case call, callIndirect: - // At this point, reg alloc is done, therefore we can safely insert dec/inc RPS instruction - // right before/after the call instruction. If this is done before reg alloc, the stack slot - // can point to the wrong location and therefore results in a wrong value. - call := cur - next := call.next - _, _, _, _, size := backend.ABIInfoFromUint64(call.u2) - if size > 0 { - dec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(size), rspVReg, true) - linkInstr(call.prev, dec) - linkInstr(dec, call) - inc := m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(size), rspVReg, true) - linkInstr(call, inc) - linkInstr(inc, next) - } - continue - } - - // Removes the redundant copy instruction. - if cur.IsCopy() && cur.op1.reg().RealReg() == cur.op2.reg().RealReg() { - prev, next := cur.prev, cur.next - // Remove the copy instruction. - prev.next = next - if next != nil { - next.prev = prev - } - } - } -} - -func (m *machine) setupEpilogueAfter(cur *instruction) { - prevNext := cur.next - - // At this point, we have the stack layout as follows: - // - // (high address) - // +-----------------+ - // | ....... | - // | ret Y | - // | ....... | - // | ret 0 | - // | arg X | - // | ....... | - // | arg 1 | - // | arg 0 | - // | ReturnAddress | - // | Caller_RBP | - // +-----------------+ <--- RBP - // | clobbered M | - // | ............ | - // | clobbered 1 | - // | clobbered 0 | - // | spill slot N | - // | ............ | - // | spill slot 0 | - // +-----------------+ <--- RSP - // (low address) - - if size := m.spillSlotSize; size > 0 { - // Simply increase the RSP to free the spill slots. - // add $size, %rsp - cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(uint32(size)), rspVReg, true)) - } - - // - // (high address) - // +-----------------+ +-----------------+ - // | ....... | | ....... | - // | ret Y | | ret Y | - // | ....... | | ....... | - // | ret 0 | | ret 0 | - // | arg X | | arg X | - // | ....... | | ....... | - // | arg 1 | | arg 1 | - // | arg 0 | | arg 0 | - // | ReturnAddress | | ReturnAddress | - // | Caller_RBP | | Caller_RBP | - // RBP ---> +-----------------+ ========> +-----------------+ <---- RSP, RBP - // | clobbered M | - // | ............ | - // | clobbered 1 | - // | clobbered 0 | - // RSP ---> +-----------------+ - // (low address) - // - if regs := m.clobberedRegs; len(regs) > 0 { - for _, r := range regs { - if r.RegType() == regalloc.RegTypeInt { - cur = linkInstr(cur, m.allocateInstr().asPop64(r)) - } else { - // Pop the XMM register is not supported by the POP instruction. - pop := m.allocateInstr().asXmmUnaryRmR( - sseOpcodeMovdqu, newOperandMem(m.newAmodeImmReg(0, rspVReg)), r, - ) - cur = linkInstr(cur, pop) - cur = m.addRSP(16, cur) - } - } - } - - // Now roll back the RSP to RBP, and pop the caller's RBP. - cur = m.revertRBPRSP(cur) - - linkInstr(cur, prevNext) -} - -func (m *machine) addRSP(offset int32, cur *instruction) *instruction { - if offset == 0 { - return cur - } - opcode := aluRmiROpcodeAdd - if offset < 0 { - opcode = aluRmiROpcodeSub - offset = -offset - } - return linkInstr(cur, m.allocateInstr().asAluRmiR(opcode, newOperandImm32(uint32(offset)), rspVReg, true)) -} - -func (m *machine) setupRBPRSP(cur *instruction) *instruction { - cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(rbpVReg))) - cur = linkInstr(cur, m.allocateInstr().asMovRR(rspVReg, rbpVReg, true)) - return cur -} - -func (m *machine) revertRBPRSP(cur *instruction) *instruction { - cur = linkInstr(cur, m.allocateInstr().asMovRR(rbpVReg, rspVReg, true)) - cur = linkInstr(cur, m.allocateInstr().asPop64(rbpVReg)) - return cur -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go deleted file mode 100644 index de9dcc944..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go +++ /dev/null @@ -1,352 +0,0 @@ -package amd64 - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -// regAllocFn implements regalloc.Function. -type regAllocFn struct { - ssaB ssa.Builder - m *machine - loopNestingForestRoots []ssa.BasicBlock - blockIter int -} - -// PostOrderBlockIteratorBegin implements regalloc.Function. -func (f *regAllocFn) PostOrderBlockIteratorBegin() *labelPosition { - f.blockIter = len(f.m.orderedSSABlockLabelPos) - 1 - return f.PostOrderBlockIteratorNext() -} - -// PostOrderBlockIteratorNext implements regalloc.Function. -func (f *regAllocFn) PostOrderBlockIteratorNext() *labelPosition { - if f.blockIter < 0 { - return nil - } - b := f.m.orderedSSABlockLabelPos[f.blockIter] - f.blockIter-- - return b -} - -// ReversePostOrderBlockIteratorBegin implements regalloc.Function. -func (f *regAllocFn) ReversePostOrderBlockIteratorBegin() *labelPosition { - f.blockIter = 0 - return f.ReversePostOrderBlockIteratorNext() -} - -// ReversePostOrderBlockIteratorNext implements regalloc.Function. -func (f *regAllocFn) ReversePostOrderBlockIteratorNext() *labelPosition { - if f.blockIter >= len(f.m.orderedSSABlockLabelPos) { - return nil - } - b := f.m.orderedSSABlockLabelPos[f.blockIter] - f.blockIter++ - return b -} - -// ClobberedRegisters implements regalloc.Function. -func (f *regAllocFn) ClobberedRegisters(regs []regalloc.VReg) { - f.m.clobberedRegs = append(f.m.clobberedRegs[:0], regs...) -} - -// LoopNestingForestRoots implements regalloc.Function. -func (f *regAllocFn) LoopNestingForestRoots() int { - f.loopNestingForestRoots = f.ssaB.LoopNestingForestRoots() - return len(f.loopNestingForestRoots) -} - -// LoopNestingForestRoot implements regalloc.Function. -func (f *regAllocFn) LoopNestingForestRoot(i int) *labelPosition { - root := f.loopNestingForestRoots[i] - pos := f.m.getOrAllocateSSABlockLabelPosition(root) - return pos -} - -// LowestCommonAncestor implements regalloc.Function. -func (f *regAllocFn) LowestCommonAncestor(blk1, blk2 *labelPosition) *labelPosition { - sb := f.ssaB.LowestCommonAncestor(blk1.sb, blk2.sb) - pos := f.m.getOrAllocateSSABlockLabelPosition(sb) - return pos -} - -// Idom implements regalloc.Function. -func (f *regAllocFn) Idom(blk *labelPosition) *labelPosition { - sb := f.ssaB.Idom(blk.sb) - pos := f.m.getOrAllocateSSABlockLabelPosition(sb) - return pos -} - -// SwapBefore implements regalloc.Function. -func (f *regAllocFn) SwapBefore(x1, x2, tmp regalloc.VReg, instr *instruction) { - f.m.swap(instr.prev, x1, x2, tmp) -} - -// StoreRegisterBefore implements regalloc.Function. -func (f *regAllocFn) StoreRegisterBefore(v regalloc.VReg, instr *instruction) { - m := f.m - m.insertStoreRegisterAt(v, instr, false) -} - -// StoreRegisterAfter implements regalloc.Function. -func (f *regAllocFn) StoreRegisterAfter(v regalloc.VReg, instr *instruction) { - m := f.m - m.insertStoreRegisterAt(v, instr, true) -} - -// ReloadRegisterBefore implements regalloc.Function. -func (f *regAllocFn) ReloadRegisterBefore(v regalloc.VReg, instr *instruction) { - m := f.m - m.insertReloadRegisterAt(v, instr, false) -} - -// ReloadRegisterAfter implements regalloc.Function. -func (f *regAllocFn) ReloadRegisterAfter(v regalloc.VReg, instr *instruction) { - m := f.m - m.insertReloadRegisterAt(v, instr, true) -} - -// InsertMoveBefore implements regalloc.Function. -func (f *regAllocFn) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { - f.m.insertMoveBefore(dst, src, instr) -} - -// LoopNestingForestChild implements regalloc.Function. -func (f *regAllocFn) LoopNestingForestChild(pos *labelPosition, i int) *labelPosition { - childSB := pos.sb.LoopNestingForestChildren()[i] - return f.m.getOrAllocateSSABlockLabelPosition(childSB) -} - -// Succ implements regalloc.Block. -func (f *regAllocFn) Succ(pos *labelPosition, i int) *labelPosition { - succSB := pos.sb.Succ(i) - if succSB.ReturnBlock() { - return nil - } - return f.m.getOrAllocateSSABlockLabelPosition(succSB) -} - -// Pred implements regalloc.Block. -func (f *regAllocFn) Pred(pos *labelPosition, i int) *labelPosition { - predSB := pos.sb.Pred(i) - return f.m.getOrAllocateSSABlockLabelPosition(predSB) -} - -// BlockParams implements regalloc.Function. -func (f *regAllocFn) BlockParams(pos *labelPosition, regs *[]regalloc.VReg) []regalloc.VReg { - c := f.m.c - *regs = (*regs)[:0] - for i := 0; i < pos.sb.Params(); i++ { - v := c.VRegOf(pos.sb.Param(i)) - *regs = append(*regs, v) - } - return *regs -} - -// ID implements regalloc.Block. -func (pos *labelPosition) ID() int32 { - return int32(pos.sb.ID()) -} - -// InstrIteratorBegin implements regalloc.Block. -func (pos *labelPosition) InstrIteratorBegin() *instruction { - ret := pos.begin - pos.cur = ret - return ret -} - -// InstrIteratorNext implements regalloc.Block. -func (pos *labelPosition) InstrIteratorNext() *instruction { - for { - if pos.cur == pos.end { - return nil - } - instr := pos.cur.next - pos.cur = instr - if instr == nil { - return nil - } else if instr.addedBeforeRegAlloc { - // Only concerned about the instruction added before regalloc. - return instr - } - } -} - -// InstrRevIteratorBegin implements regalloc.Block. -func (pos *labelPosition) InstrRevIteratorBegin() *instruction { - pos.cur = pos.end - return pos.cur -} - -// InstrRevIteratorNext implements regalloc.Block. -func (pos *labelPosition) InstrRevIteratorNext() *instruction { - for { - if pos.cur == pos.begin { - return nil - } - instr := pos.cur.prev - pos.cur = instr - if instr == nil { - return nil - } else if instr.addedBeforeRegAlloc { - // Only concerned about the instruction added before regalloc. - return instr - } - } -} - -// FirstInstr implements regalloc.Block. -func (pos *labelPosition) FirstInstr() *instruction { return pos.begin } - -// LastInstrForInsertion implements regalloc.Block. -func (pos *labelPosition) LastInstrForInsertion() *instruction { - return lastInstrForInsertion(pos.begin, pos.end) -} - -// Preds implements regalloc.Block. -func (pos *labelPosition) Preds() int { return pos.sb.Preds() } - -// Entry implements regalloc.Block. -func (pos *labelPosition) Entry() bool { return pos.sb.EntryBlock() } - -// Succs implements regalloc.Block. -func (pos *labelPosition) Succs() int { return pos.sb.Succs() } - -// LoopHeader implements regalloc.Block. -func (pos *labelPosition) LoopHeader() bool { return pos.sb.LoopHeader() } - -// LoopNestingForestChildren implements regalloc.Block. -func (pos *labelPosition) LoopNestingForestChildren() int { - return len(pos.sb.LoopNestingForestChildren()) -} - -func (m *machine) insertMoveBefore(dst, src regalloc.VReg, instr *instruction) { - typ := src.RegType() - if typ != dst.RegType() { - panic("BUG: src and dst must have the same type") - } - - mov := m.allocateInstr() - if typ == regalloc.RegTypeInt { - mov.asMovRR(src, dst, true) - } else { - mov.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandReg(src), dst) - } - - cur := instr.prev - prevNext := cur.next - cur = linkInstr(cur, mov) - linkInstr(cur, prevNext) -} - -func (m *machine) insertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { - if !v.IsRealReg() { - panic("BUG: VReg must be backed by real reg to be stored") - } - - typ := m.c.TypeOf(v) - - var prevNext, cur *instruction - if after { - cur, prevNext = instr, instr.next - } else { - cur, prevNext = instr.prev, instr - } - - offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size()) - store := m.allocateInstr() - mem := newOperandMem(m.newAmodeImmReg(uint32(offsetFromSP), rspVReg)) - switch typ { - case ssa.TypeI32: - store.asMovRM(v, mem, 4) - case ssa.TypeI64: - store.asMovRM(v, mem, 8) - case ssa.TypeF32: - store.asXmmMovRM(sseOpcodeMovss, v, mem) - case ssa.TypeF64: - store.asXmmMovRM(sseOpcodeMovsd, v, mem) - case ssa.TypeV128: - store.asXmmMovRM(sseOpcodeMovdqu, v, mem) - } - - cur = linkInstr(cur, store) - return linkInstr(cur, prevNext) -} - -func (m *machine) insertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { - if !v.IsRealReg() { - panic("BUG: VReg must be backed by real reg to be stored") - } - - typ := m.c.TypeOf(v) - var prevNext, cur *instruction - if after { - cur, prevNext = instr, instr.next - } else { - cur, prevNext = instr.prev, instr - } - - // Load the value to the temporary. - load := m.allocateInstr() - offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size()) - a := newOperandMem(m.newAmodeImmReg(uint32(offsetFromSP), rspVReg)) - switch typ { - case ssa.TypeI32: - load.asMovzxRmR(extModeLQ, a, v) - case ssa.TypeI64: - load.asMov64MR(a, v) - case ssa.TypeF32: - load.asXmmUnaryRmR(sseOpcodeMovss, a, v) - case ssa.TypeF64: - load.asXmmUnaryRmR(sseOpcodeMovsd, a, v) - case ssa.TypeV128: - load.asXmmUnaryRmR(sseOpcodeMovdqu, a, v) - default: - panic("BUG") - } - - cur = linkInstr(cur, load) - return linkInstr(cur, prevNext) -} - -func (m *machine) swap(cur *instruction, x1, x2, tmp regalloc.VReg) { - if x1.RegType() == regalloc.RegTypeInt { - prevNext := cur.next - xc := m.allocateInstr().asXCHG(x1, newOperandReg(x2), 8) - cur = linkInstr(cur, xc) - linkInstr(cur, prevNext) - } else { - if tmp.Valid() { - prevNext := cur.next - m.insertMoveBefore(tmp, x1, prevNext) - m.insertMoveBefore(x1, x2, prevNext) - m.insertMoveBefore(x2, tmp, prevNext) - } else { - prevNext := cur.next - r2 := x2.RealReg() - // Temporarily spill x1 to stack. - cur = m.insertStoreRegisterAt(x1, cur, true).prev - // Then move x2 to x1. - cur = linkInstr(cur, m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqa, newOperandReg(x2), x1)) - linkInstr(cur, prevNext) - // Then reload the original value on x1 from stack to r2. - m.insertReloadRegisterAt(x1.SetRealReg(r2), cur, true) - } - } -} - -func lastInstrForInsertion(begin, end *instruction) *instruction { - cur := end - for cur.kind == nop0 { - cur = cur.prev - if cur == begin { - return end - } - } - switch cur.kind { - case jmp: - return cur - default: - return end - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go deleted file mode 100644 index 8d514d857..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go +++ /dev/null @@ -1,992 +0,0 @@ -package amd64 - -import ( - "fmt" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -var swizzleMask = [16]byte{ - 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, - 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, -} - -func (m *machine) lowerSwizzle(x, y ssa.Value, ret ssa.Value) { - masklabel := m.getOrAllocateConstLabel(&m.constSwizzleMaskConstIndex, swizzleMask[:]) - - // Load mask to maskReg. - maskReg := m.c.AllocateVReg(ssa.TypeV128) - loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(masklabel)), maskReg) - m.insert(loadMask) - - // Copy x and y to tmp registers. - xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - tmpDst := m.copyToTmp(xx.reg()) - yy := m.getOperand_Reg(m.c.ValueDefinition(y)) - tmpX := m.copyToTmp(yy.reg()) - - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddusb, newOperandReg(maskReg), tmpX)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmpX), tmpDst)) - - // Copy the result to the destination register. - m.copyTo(tmpDst, m.c.VRegOf(ret)) -} - -func (m *machine) lowerInsertLane(x, y ssa.Value, index byte, ret ssa.Value, lane ssa.VecLane) { - // Copy x to tmp. - tmpDst := m.c.AllocateVReg(ssa.TypeV128) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, m.getOperand_Mem_Reg(m.c.ValueDefinition(x)), tmpDst)) - - yy := m.getOperand_Reg(m.c.ValueDefinition(y)) - switch lane { - case ssa.VecLaneI8x16: - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, index, yy, tmpDst)) - case ssa.VecLaneI16x8: - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, index, yy, tmpDst)) - case ssa.VecLaneI32x4: - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, index, yy, tmpDst)) - case ssa.VecLaneI64x2: - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, index, yy, tmpDst)) - case ssa.VecLaneF32x4: - // In INSERTPS instruction, the destination index is encoded at 4 and 5 bits of the argument. - // See https://www.felixcloutier.com/x86/insertps - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeInsertps, index<<4, yy, tmpDst)) - case ssa.VecLaneF64x2: - if index == 0 { - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, yy, tmpDst)) - } else { - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMovlhps, yy, tmpDst)) - } - default: - panic(fmt.Sprintf("invalid lane type: %s", lane)) - } - - m.copyTo(tmpDst, m.c.VRegOf(ret)) -} - -func (m *machine) lowerExtractLane(x ssa.Value, index byte, signed bool, ret ssa.Value, lane ssa.VecLane) { - // Pextr variants are used to extract a lane from a vector register. - xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - - tmpDst := m.c.AllocateVReg(ret.Type()) - m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst)) - switch lane { - case ssa.VecLaneI8x16: - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrb, index, xx, tmpDst)) - if signed { - m.insert(m.allocateInstr().asMovsxRmR(extModeBL, newOperandReg(tmpDst), tmpDst)) - } else { - m.insert(m.allocateInstr().asMovzxRmR(extModeBL, newOperandReg(tmpDst), tmpDst)) - } - case ssa.VecLaneI16x8: - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrw, index, xx, tmpDst)) - if signed { - m.insert(m.allocateInstr().asMovsxRmR(extModeWL, newOperandReg(tmpDst), tmpDst)) - } else { - m.insert(m.allocateInstr().asMovzxRmR(extModeWL, newOperandReg(tmpDst), tmpDst)) - } - case ssa.VecLaneI32x4: - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrd, index, xx, tmpDst)) - case ssa.VecLaneI64x2: - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, index, xx, tmpDst)) - case ssa.VecLaneF32x4: - if index == 0 { - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovss, xx, tmpDst)) - } else { - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, index, xx, tmpDst)) - } - case ssa.VecLaneF64x2: - if index == 0 { - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, xx, tmpDst)) - } else { - m.copyTo(xx.reg(), tmpDst) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0b00_00_11_10, newOperandReg(tmpDst), tmpDst)) - } - default: - panic(fmt.Sprintf("invalid lane type: %s", lane)) - } - - m.copyTo(tmpDst, m.c.VRegOf(ret)) -} - -var sqmulRoundSat = [16]byte{ - 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, - 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, -} - -func (m *machine) lowerSqmulRoundSat(x, y, ret ssa.Value) { - // See https://github.com/WebAssembly/simd/pull/365 for the following logic. - maskLabel := m.getOrAllocateConstLabel(&m.constSqmulRoundSatIndex, sqmulRoundSat[:]) - - tmp := m.c.AllocateVReg(ssa.TypeV128) - loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), tmp) - m.insert(loadMask) - - xx, yy := m.getOperand_Reg(m.c.ValueDefinition(x)), m.getOperand_Mem_Reg(m.c.ValueDefinition(y)) - tmpX := m.copyToTmp(xx.reg()) - - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmulhrsw, yy, tmpX)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqw, newOperandReg(tmpX), tmp)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmpX)) - - m.copyTo(tmpX, m.c.VRegOf(ret)) -} - -func (m *machine) lowerVUshr(x, y, ret ssa.Value, lane ssa.VecLane) { - switch lane { - case ssa.VecLaneI8x16: - m.lowerVUshri8x16(x, y, ret) - case ssa.VecLaneI16x8, ssa.VecLaneI32x4, ssa.VecLaneI64x2: - m.lowerShr(x, y, ret, lane, false) - default: - panic(fmt.Sprintf("invalid lane type: %s", lane)) - } -} - -// i8x16LogicalSHRMaskTable is necessary for emulating non-existent packed bytes logical right shifts on amd64. -// The mask is applied after performing packed word shifts on the value to clear out the unnecessary bits. -var i8x16LogicalSHRMaskTable = [8 * 16]byte{ // (the number of possible shift amount 0, 1, ..., 7.) * 16 bytes. - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // for 0 shift - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // for 1 shift - 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, // for 2 shift - 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, // for 3 shift - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, // for 4 shift - 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, // for 5 shift - 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, // for 6 shift - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // for 7 shift -} - -func (m *machine) lowerVUshri8x16(x, y, ret ssa.Value) { - tmpGpReg := m.c.AllocateVReg(ssa.TypeI32) - // Load the modulo 8 mask to tmpReg. - m.lowerIconst(tmpGpReg, 0x7, false) - // Take the modulo 8 of the shift amount. - shiftAmt := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)) - m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, shiftAmt, tmpGpReg, false)) - - _xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - xx := m.copyToTmp(_xx.reg()) - - vecTmp := m.c.AllocateVReg(ssa.TypeV128) - m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), vecTmp, false)) - m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrlw, newOperandReg(vecTmp), xx)) - - maskTableLabel := m.getOrAllocateConstLabel(&m.constI8x16LogicalSHRMaskTableIndex, i8x16LogicalSHRMaskTable[:]) - base := m.c.AllocateVReg(ssa.TypeI64) - lea := m.allocateInstr().asLEA(newOperandLabel(maskTableLabel), base) - m.insert(lea) - - // Shift tmpGpReg by 4 to multiply the shift amount by 16. - m.insert(m.allocateInstr().asShiftR(shiftROpShiftLeft, newOperandImm32(4), tmpGpReg, false)) - - mem := m.newAmodeRegRegShift(0, base, tmpGpReg, 0) - loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(mem), vecTmp) - m.insert(loadMask) - - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePand, newOperandReg(vecTmp), xx)) - m.copyTo(xx, m.c.VRegOf(ret)) -} - -func (m *machine) lowerVSshr(x, y, ret ssa.Value, lane ssa.VecLane) { - switch lane { - case ssa.VecLaneI8x16: - m.lowerVSshri8x16(x, y, ret) - case ssa.VecLaneI16x8, ssa.VecLaneI32x4: - m.lowerShr(x, y, ret, lane, true) - case ssa.VecLaneI64x2: - m.lowerVSshri64x2(x, y, ret) - default: - panic(fmt.Sprintf("invalid lane type: %s", lane)) - } -} - -func (m *machine) lowerVSshri8x16(x, y, ret ssa.Value) { - shiftAmtReg := m.c.AllocateVReg(ssa.TypeI32) - // Load the modulo 8 mask to tmpReg. - m.lowerIconst(shiftAmtReg, 0x7, false) - // Take the modulo 8 of the shift amount. - shiftAmt := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)) - m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, shiftAmt, shiftAmtReg, false)) - - // Copy the x value to two temporary registers. - _xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - xx := m.copyToTmp(_xx.reg()) - vecTmp := m.c.AllocateVReg(ssa.TypeV128) - m.copyTo(xx, vecTmp) - - // Assuming that we have - // xx = [b1, ..., b16] - // vecTmp = [b1, ..., b16] - // at this point, then we use PUNPCKLBW and PUNPCKHBW to produce: - // xx = [b1, b1, b2, b2, ..., b8, b8] - // vecTmp = [b9, b9, b10, b10, ..., b16, b16] - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePunpcklbw, newOperandReg(xx), xx)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePunpckhbw, newOperandReg(vecTmp), vecTmp)) - - // Adding 8 to the shift amount, and then move the amount to vecTmp2. - vecTmp2 := m.c.AllocateVReg(ssa.TypeV128) - m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(8), shiftAmtReg, false)) - m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(shiftAmtReg), vecTmp2, false)) - - // Perform the word packed arithmetic right shifts on vreg and vecTmp. - // This changes these two registers as: - // xx = [xxx, b1 >> s, xxx, b2 >> s, ..., xxx, b8 >> s] - // vecTmp = [xxx, b9 >> s, xxx, b10 >> s, ..., xxx, b16 >> s] - // where xxx is 1 or 0 depending on each byte's sign, and ">>" is the arithmetic shift on a byte. - m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsraw, newOperandReg(vecTmp2), xx)) - m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsraw, newOperandReg(vecTmp2), vecTmp)) - - // Finally, we can get the result by packing these two word vectors. - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePacksswb, newOperandReg(vecTmp), xx)) - - m.copyTo(xx, m.c.VRegOf(ret)) -} - -func (m *machine) lowerVSshri64x2(x, y, ret ssa.Value) { - // Load the shift amount to RCX. - shiftAmt := m.getOperand_Mem_Reg(m.c.ValueDefinition(y)) - m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, shiftAmt, rcxVReg)) - - tmpGp := m.c.AllocateVReg(ssa.TypeI64) - - _xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - xxReg := m.copyToTmp(_xx.reg()) - - m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, 0, newOperandReg(xxReg), tmpGp)) - m.insert(m.allocateInstr().asShiftR(shiftROpShiftRightArithmetic, newOperandReg(rcxVReg), tmpGp, true)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, newOperandReg(tmpGp), xxReg)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, 1, newOperandReg(xxReg), tmpGp)) - m.insert(m.allocateInstr().asShiftR(shiftROpShiftRightArithmetic, newOperandReg(rcxVReg), tmpGp, true)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, newOperandReg(tmpGp), xxReg)) - - m.copyTo(xxReg, m.c.VRegOf(ret)) -} - -func (m *machine) lowerShr(x, y, ret ssa.Value, lane ssa.VecLane, signed bool) { - var modulo uint64 - var shiftOp sseOpcode - switch lane { - case ssa.VecLaneI16x8: - modulo = 0xf - if signed { - shiftOp = sseOpcodePsraw - } else { - shiftOp = sseOpcodePsrlw - } - case ssa.VecLaneI32x4: - modulo = 0x1f - if signed { - shiftOp = sseOpcodePsrad - } else { - shiftOp = sseOpcodePsrld - } - case ssa.VecLaneI64x2: - modulo = 0x3f - if signed { - panic("BUG") - } - shiftOp = sseOpcodePsrlq - default: - panic(fmt.Sprintf("invalid lane type: %s", lane)) - } - - _xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - xx := m.copyToTmp(_xx.reg()) - - tmpGpReg := m.c.AllocateVReg(ssa.TypeI32) - // Load the modulo 8 mask to tmpReg. - m.lowerIconst(tmpGpReg, modulo, false) - // Take the modulo 8 of the shift amount. - m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, - m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)), tmpGpReg, false)) - // And move it to a xmm register. - tmpVec := m.c.AllocateVReg(ssa.TypeV128) - m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), tmpVec, false)) - - // Then do the actual shift. - m.insert(m.allocateInstr().asXmmRmiReg(shiftOp, newOperandReg(tmpVec), xx)) - - m.copyTo(xx, m.c.VRegOf(ret)) -} - -func (m *machine) lowerVIshl(x, y, ret ssa.Value, lane ssa.VecLane) { - var modulo uint64 - var shiftOp sseOpcode - var isI8x16 bool - switch lane { - case ssa.VecLaneI8x16: - isI8x16 = true - modulo = 0x7 - shiftOp = sseOpcodePsllw - case ssa.VecLaneI16x8: - modulo = 0xf - shiftOp = sseOpcodePsllw - case ssa.VecLaneI32x4: - modulo = 0x1f - shiftOp = sseOpcodePslld - case ssa.VecLaneI64x2: - modulo = 0x3f - shiftOp = sseOpcodePsllq - default: - panic(fmt.Sprintf("invalid lane type: %s", lane)) - } - - _xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - xx := m.copyToTmp(_xx.reg()) - - tmpGpReg := m.c.AllocateVReg(ssa.TypeI32) - // Load the modulo 8 mask to tmpReg. - m.lowerIconst(tmpGpReg, modulo, false) - // Take the modulo 8 of the shift amount. - m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, - m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)), tmpGpReg, false)) - // And move it to a xmm register. - tmpVec := m.c.AllocateVReg(ssa.TypeV128) - m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), tmpVec, false)) - - // Then do the actual shift. - m.insert(m.allocateInstr().asXmmRmiReg(shiftOp, newOperandReg(tmpVec), xx)) - - if isI8x16 { - maskTableLabel := m.getOrAllocateConstLabel(&m.constI8x16SHLMaskTableIndex, i8x16SHLMaskTable[:]) - base := m.c.AllocateVReg(ssa.TypeI64) - lea := m.allocateInstr().asLEA(newOperandLabel(maskTableLabel), base) - m.insert(lea) - - // Shift tmpGpReg by 4 to multiply the shift amount by 16. - m.insert(m.allocateInstr().asShiftR(shiftROpShiftLeft, newOperandImm32(4), tmpGpReg, false)) - - mem := m.newAmodeRegRegShift(0, base, tmpGpReg, 0) - loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(mem), tmpVec) - m.insert(loadMask) - - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePand, newOperandReg(tmpVec), xx)) - } - - m.copyTo(xx, m.c.VRegOf(ret)) -} - -// i8x16SHLMaskTable is necessary for emulating non-existent packed bytes left shifts on amd64. -// The mask is applied after performing packed word shifts on the value to clear out the unnecessary bits. -var i8x16SHLMaskTable = [8 * 16]byte{ // (the number of possible shift amount 0, 1, ..., 7.) * 16 bytes. - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // for 0 shift - 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, // for 1 shift - 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, // for 2 shift - 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, // for 3 shift - 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // for 4 shift - 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, // for 5 shift - 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, // for 6 shift - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, // for 7 shift -} - -func (m *machine) lowerVRound(x, ret ssa.Value, imm byte, _64 bool) { - xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x)) - var round sseOpcode - if _64 { - round = sseOpcodeRoundpd - } else { - round = sseOpcodeRoundps - } - m.insert(m.allocateInstr().asXmmUnaryRmRImm(round, imm, xx, m.c.VRegOf(ret))) -} - -var ( - allOnesI8x16 = [16]byte{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1} - allOnesI16x8 = [16]byte{0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0} - extAddPairwiseI16x8uMask1 = [16]byte{0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80} - extAddPairwiseI16x8uMask2 = [16]byte{0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00} -) - -func (m *machine) lowerExtIaddPairwise(x, ret ssa.Value, srcLane ssa.VecLane, signed bool) { - _xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - xx := m.copyToTmp(_xx.reg()) - switch srcLane { - case ssa.VecLaneI8x16: - allOneReg := m.c.AllocateVReg(ssa.TypeV128) - mask := m.getOrAllocateConstLabel(&m.constAllOnesI8x16Index, allOnesI8x16[:]) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), allOneReg)) - - var resultReg regalloc.VReg - if signed { - resultReg = allOneReg - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddubsw, newOperandReg(xx), resultReg)) - } else { - // Interpreter tmp (all ones) as signed byte meaning that all the multiply-add is unsigned. - resultReg = xx - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddubsw, newOperandReg(allOneReg), resultReg)) - } - m.copyTo(resultReg, m.c.VRegOf(ret)) - - case ssa.VecLaneI16x8: - if signed { - allOnesReg := m.c.AllocateVReg(ssa.TypeV128) - mask := m.getOrAllocateConstLabel(&m.constAllOnesI16x8Index, allOnesI16x8[:]) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), allOnesReg)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, newOperandReg(allOnesReg), xx)) - m.copyTo(xx, m.c.VRegOf(ret)) - } else { - maskReg := m.c.AllocateVReg(ssa.TypeV128) - mask := m.getOrAllocateConstLabel(&m.constExtAddPairwiseI16x8uMask1Index, extAddPairwiseI16x8uMask1[:]) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg)) - - // Flip the sign bits on xx. - // - // Assuming that xx = [w1, ..., w8], now we have, - // xx[i] = int8(-w1) for i = 0...8 - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(maskReg), xx)) - - mask = m.getOrAllocateConstLabel(&m.constAllOnesI16x8Index, allOnesI16x8[:]) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg)) - - // For i = 0,..4 (as this results in i32x4 lanes), now we have - // xx[i] = int32(-wn + -w(n+1)) = int32(-(wn + w(n+1))) - // c.assembler.CompileRegisterToRegister(amd64.PMADDWD, tmp, vr) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, newOperandReg(maskReg), xx)) - - mask = m.getOrAllocateConstLabel(&m.constExtAddPairwiseI16x8uMask2Index, extAddPairwiseI16x8uMask2[:]) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg)) - - // vr[i] = int32(-(wn + w(n+1))) + int32(math.MaxInt16+1) = int32((wn + w(n+1))) = uint32(wn + w(n+1)). - // c.assembler.CompileRegisterToRegister(amd64.PADDD, tmp, vr) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddd, newOperandReg(maskReg), xx)) - - m.copyTo(xx, m.c.VRegOf(ret)) - } - default: - panic(fmt.Sprintf("invalid lane type: %s", srcLane)) - } -} - -func (m *machine) lowerWidenLow(x, ret ssa.Value, lane ssa.VecLane, signed bool) { - var sseOp sseOpcode - switch lane { - case ssa.VecLaneI8x16: - if signed { - sseOp = sseOpcodePmovsxbw - } else { - sseOp = sseOpcodePmovzxbw - } - case ssa.VecLaneI16x8: - if signed { - sseOp = sseOpcodePmovsxwd - } else { - sseOp = sseOpcodePmovzxwd - } - case ssa.VecLaneI32x4: - if signed { - sseOp = sseOpcodePmovsxdq - } else { - sseOp = sseOpcodePmovzxdq - } - default: - panic(fmt.Sprintf("invalid lane type: %s", lane)) - } - - xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x)) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, xx, m.c.VRegOf(ret))) -} - -func (m *machine) lowerWidenHigh(x, ret ssa.Value, lane ssa.VecLane, signed bool) { - tmp := m.c.AllocateVReg(ssa.TypeV128) - xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - m.copyTo(xx.reg(), tmp) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePalignr, 8, newOperandReg(tmp), tmp)) - - var sseOp sseOpcode - switch lane { - case ssa.VecLaneI8x16: - if signed { - sseOp = sseOpcodePmovsxbw - } else { - sseOp = sseOpcodePmovzxbw - } - case ssa.VecLaneI16x8: - if signed { - sseOp = sseOpcodePmovsxwd - } else { - sseOp = sseOpcodePmovzxwd - } - case ssa.VecLaneI32x4: - if signed { - sseOp = sseOpcodePmovsxdq - } else { - sseOp = sseOpcodePmovzxdq - } - default: - panic(fmt.Sprintf("invalid lane type: %s", lane)) - } - - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, newOperandReg(tmp), m.c.VRegOf(ret))) -} - -func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, ret ssa.Value, lane ssa.VecLane) { - tmpDst, tmpGp := m.c.AllocateVReg(ssa.TypeV128), m.c.AllocateVReg(ssa.TypeI64) - am := newOperandMem(m.lowerToAddressMode(ptr, offset)) - - m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst)) - switch lane { - case ssa.VecLaneI8x16: - m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, am, tmpGp)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, 0, newOperandReg(tmpGp), tmpDst)) - tmpZeroVec := m.c.AllocateVReg(ssa.TypeV128) - m.insert(m.allocateInstr().asZeros(tmpZeroVec)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmpZeroVec), tmpDst)) - case ssa.VecLaneI16x8: - m.insert(m.allocateInstr().asMovzxRmR(extModeWQ, am, tmpGp)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 0, newOperandReg(tmpGp), tmpDst)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 1, newOperandReg(tmpGp), tmpDst)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst)) - case ssa.VecLaneI32x4: - m.insert(m.allocateInstr().asMovzxRmR(extModeLQ, am, tmpGp)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, 0, newOperandReg(tmpGp), tmpDst)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst)) - case ssa.VecLaneI64x2: - m.insert(m.allocateInstr().asMov64MR(am, tmpGp)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, newOperandReg(tmpGp), tmpDst)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, newOperandReg(tmpGp), tmpDst)) - default: - panic(fmt.Sprintf("invalid lane type: %s", lane)) - } - - m.copyTo(tmpDst, m.c.VRegOf(ret)) -} - -var f64x2CvtFromIMask = [16]byte{ - 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -} - -func (m *machine) lowerVFcvtFromInt(x, ret ssa.Value, lane ssa.VecLane, signed bool) { - switch lane { - case ssa.VecLaneF32x4: - if signed { - xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, xx, m.c.VRegOf(ret))) - } else { - xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - // Copy the value to two temporary registers. - tmp := m.copyToTmp(xx.reg()) - tmp2 := m.copyToTmp(xx.reg()) - - // Clear the higher 16 bits of each 32-bit element. - m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePslld, newOperandImm32(0xa), tmp)) - m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(0xa), tmp)) - - // Subtract the higher 16-bits from tmp2: clear the lower 16-bits of tmp2. - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePsubd, newOperandReg(tmp), tmp2)) - - // Convert the lower 16-bits in tmp. - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp), tmp)) - - // Left shift by one and convert tmp2, meaning that halved conversion result of higher 16-bits in tmp2. - m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(1), tmp2)) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp2), tmp2)) - - // Double the converted halved higher 16bits. - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddps, newOperandReg(tmp2), tmp2)) - - // Get the conversion result by add tmp (holding lower 16-bit conversion) into tmp2. - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddps, newOperandReg(tmp), tmp2)) - - m.copyTo(tmp2, m.c.VRegOf(ret)) - } - case ssa.VecLaneF64x2: - if signed { - xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x)) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2pd, xx, m.c.VRegOf(ret))) - } else { - maskReg := m.c.AllocateVReg(ssa.TypeV128) - maskLabel := m.getOrAllocateConstLabel(&m.constF64x2CvtFromIMaskIndex, f64x2CvtFromIMask[:]) - // maskReg = [0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00] - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), maskReg)) - - _xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - xx := m.copyToTmp(_xx.reg()) - - // Given that we have xx = [d1, d2, d3, d4], this results in - // xx = [d1, [0x00, 0x00, 0x30, 0x43], d2, [0x00, 0x00, 0x30, 0x43]] - // = [float64(uint32(d1)) + 0x1.0p52, float64(uint32(d2)) + 0x1.0p52] - // ^See https://stackoverflow.com/questions/13269523/can-all-32-bit-ints-be-exactly-represented-as-a-double - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeUnpcklps, newOperandReg(maskReg), xx)) - - // maskReg = [float64(0x1.0p52), float64(0x1.0p52)] - maskLabel = m.getOrAllocateConstLabel(&m.constTwop52Index, twop52[:]) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), maskReg)) - - // Now, we get the result as - // xx = [float64(uint32(d1)), float64(uint32(d2))] - // because the following equality always satisfies: - // float64(0x1.0p52 + float64(uint32(x))) - float64(0x1.0p52 + float64(uint32(y))) = float64(uint32(x)) - float64(uint32(y)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeSubpd, newOperandReg(maskReg), xx)) - - m.copyTo(xx, m.c.VRegOf(ret)) - } - default: - panic(fmt.Sprintf("invalid lane type: %s", lane)) - } -} - -var ( - // i32sMaxOnF64x2 holds math.MaxInt32(=2147483647.0) on two f64 lanes. - i32sMaxOnF64x2 = [16]byte{ - 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0) - 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0) - } - - // i32sMaxOnF64x2 holds math.MaxUint32(=4294967295.0) on two f64 lanes. - i32uMaxOnF64x2 = [16]byte{ - 0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0) - 0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0) - } - - // twop52 holds two float64(0x1.0p52) on two f64 lanes. 0x1.0p52 is special in the sense that - // with this exponent, the mantissa represents a corresponding uint32 number, and arithmetics, - // like addition or subtraction, the resulted floating point holds exactly the same - // bit representations in 32-bit integer on its mantissa. - // - // Note: the name twop52 is common across various compiler ecosystem. - // E.g. https://github.com/llvm/llvm-project/blob/92ab024f81e5b64e258b7c3baaf213c7c26fcf40/compiler-rt/lib/builtins/floatdidf.c#L28 - // E.g. https://opensource.apple.com/source/clang/clang-425.0.24/src/projects/compiler-rt/lib/floatdidf.c.auto.html - twop52 = [16]byte{ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52) - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52) - } -) - -func (m *machine) lowerVFcvtToIntSat(x, ret ssa.Value, lane ssa.VecLane, signed bool) { - _xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - xx := m.copyToTmp(_xx.reg()) - - switch lane { - case ssa.VecLaneF32x4: - if signed { - tmp := m.copyToTmp(xx) - - // Assuming we have xx = [v1, v2, v3, v4]. - // - // Set all bits if lane is not NaN on tmp. - // tmp[i] = 0xffffffff if vi != NaN - // = 0 if vi == NaN - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmpps, uint8(cmpPredEQ_OQ), newOperandReg(tmp), tmp)) - - // Clear NaN lanes on xx, meaning that - // xx[i] = vi if vi != NaN - // 0 if vi == NaN - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(tmp), xx)) - - // tmp[i] = ^vi if vi != NaN - // = 0xffffffff if vi == NaN - // which means that tmp[i] & 0x80000000 != 0 if and only if vi is negative. - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeXorps, newOperandReg(xx), tmp)) - - // xx[i] = int32(vi) if vi != NaN and xx is not overflowing. - // = 0x80000000 if vi != NaN and xx is overflowing (See https://www.felixcloutier.com/x86/cvttps2dq) - // = 0 if vi == NaN - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(xx), xx)) - - // Below, we have to convert 0x80000000 into 0x7FFFFFFF for positive overflowing lane. - // - // tmp[i] = 0x80000000 if vi is positive - // = any satisfying any&0x80000000 = 0 if vi is negative or zero. - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(xx), tmp)) - - // Arithmetic right shifting tmp by 31, meaning that we have - // tmp[i] = 0xffffffff if vi is positive, 0 otherwise. - m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrad, newOperandImm32(0x1f), tmp)) - - // Flipping 0x80000000 if vi is positive, otherwise keep intact. - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), xx)) - } else { - tmp := m.c.AllocateVReg(ssa.TypeV128) - m.insert(m.allocateInstr().asZeros(tmp)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMaxps, newOperandReg(tmp), xx)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmp), tmp)) - m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(0x1), tmp)) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp), tmp)) - tmp2 := m.copyToTmp(xx) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(xx), xx)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeSubps, newOperandReg(tmp), tmp2)) - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmpps, uint8(cmpPredLE_OS), newOperandReg(tmp2), tmp)) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(tmp2), tmp2)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp2)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaxsd, newOperandReg(tmp), tmp2)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddd, newOperandReg(tmp2), xx)) - } - - case ssa.VecLaneF64x2: - tmp2 := m.c.AllocateVReg(ssa.TypeV128) - if signed { - tmp := m.copyToTmp(xx) - - // Set all bits for non-NaN lanes, zeros otherwise. - // I.e. tmp[i] = 0xffffffff_ffffffff if vi != NaN, 0 otherwise. - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmppd, uint8(cmpPredEQ_OQ), newOperandReg(tmp), tmp)) - - maskLabel := m.getOrAllocateConstLabel(&m.constI32sMaxOnF64x2Index, i32sMaxOnF64x2[:]) - // Load the 2147483647 into tmp2's each lane. - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), tmp2)) - - // tmp[i] = 2147483647 if vi != NaN, 0 otherwise. - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(tmp2), tmp)) - - // MINPD returns the source register's value as-is, so we have - // xx[i] = vi if vi != NaN - // = 0 if vi == NaN - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMinpd, newOperandReg(tmp), xx)) - - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttpd2dq, newOperandReg(xx), xx)) - } else { - tmp := m.c.AllocateVReg(ssa.TypeV128) - m.insert(m.allocateInstr().asZeros(tmp)) - - // xx[i] = vi if vi != NaN && vi > 0 - // = 0 if vi == NaN || vi <= 0 - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMaxpd, newOperandReg(tmp), xx)) - - // tmp2[i] = float64(math.MaxUint32) = math.MaxUint32 - maskIndex := m.getOrAllocateConstLabel(&m.constI32uMaxOnF64x2Index, i32uMaxOnF64x2[:]) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskIndex)), tmp2)) - - // xx[i] = vi if vi != NaN && vi > 0 && vi <= math.MaxUint32 - // = 0 otherwise - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMinpd, newOperandReg(tmp2), xx)) - - // Round the floating points into integer. - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeRoundpd, 0x3, newOperandReg(xx), xx)) - - // tmp2[i] = float64(0x1.0p52) - maskIndex = m.getOrAllocateConstLabel(&m.constTwop52Index, twop52[:]) - m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskIndex)), tmp2)) - - // xx[i] = float64(0x1.0p52) + float64(uint32(vi)) if vi != NaN && vi > 0 && vi <= math.MaxUint32 - // = 0 otherwise - // - // This means that xx[i] holds exactly the same bit of uint32(vi) in its lower 32-bits. - m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddpd, newOperandReg(tmp2), xx)) - - // At this point, we have - // xx = [uint32(v0), float64(0x1.0p52), uint32(v1), float64(0x1.0p52)] - // tmp = [0, 0, 0, 0] - // as 32x4 lanes. Therefore, SHUFPS with 0b00_00_10_00 results in - // xx = [xx[00], xx[10], tmp[00], tmp[00]] = [xx[00], xx[10], 0, 0] - // meaning that for i = 0 and 1, we have - // xx[i] = uint32(vi) if vi != NaN && vi > 0 && vi <= math.MaxUint32 - // = 0 otherwise. - m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeShufps, 0b00_00_10_00, newOperandReg(tmp), xx)) - } - default: - panic(fmt.Sprintf("invalid lane type: %s", lane)) - } - - m.copyTo(xx, m.c.VRegOf(ret)) -} - -func (m *machine) lowerNarrow(x, y, ret ssa.Value, lane ssa.VecLane, signed bool) { - _xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - xx := m.copyToTmp(_xx.reg()) - yy := m.getOperand_Mem_Reg(m.c.ValueDefinition(y)) - - var sseOp sseOpcode - switch lane { - case ssa.VecLaneI16x8: - if signed { - sseOp = sseOpcodePacksswb - } else { - sseOp = sseOpcodePackuswb - } - case ssa.VecLaneI32x4: - if signed { - sseOp = sseOpcodePackssdw - } else { - sseOp = sseOpcodePackusdw - } - default: - panic(fmt.Sprintf("invalid lane type: %s", lane)) - } - m.insert(m.allocateInstr().asXmmRmR(sseOp, yy, xx)) - m.copyTo(xx, m.c.VRegOf(ret)) -} - -func (m *machine) lowerWideningPairwiseDotProductS(x, y, ret ssa.Value) { - _xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - xx := m.copyToTmp(_xx.reg()) - yy := m.getOperand_Mem_Reg(m.c.ValueDefinition(y)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, yy, xx)) - m.copyTo(xx, m.c.VRegOf(ret)) -} - -func (m *machine) lowerVIabs(instr *ssa.Instruction) { - x, lane := instr.ArgWithLane() - rd := m.c.VRegOf(instr.Return()) - - if lane == ssa.VecLaneI64x2 { - _xx := m.getOperand_Reg(m.c.ValueDefinition(x)) - - blendReg := xmm0VReg - m.insert(m.allocateInstr().asDefineUninitializedReg(blendReg)) - - tmp := m.copyToTmp(_xx.reg()) - xx := m.copyToTmp(_xx.reg()) - - // Clear all bits on blendReg. - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(blendReg), blendReg)) - // Subtract xx from blendMaskReg. - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePsubq, newOperandReg(xx), blendReg)) - // Copy the subtracted value ^^ back into tmp. - m.copyTo(blendReg, xx) - - m.insert(m.allocateInstr().asBlendvpd(newOperandReg(tmp), xx)) - - m.copyTo(xx, rd) - } else { - var vecOp sseOpcode - switch lane { - case ssa.VecLaneI8x16: - vecOp = sseOpcodePabsb - case ssa.VecLaneI16x8: - vecOp = sseOpcodePabsw - case ssa.VecLaneI32x4: - vecOp = sseOpcodePabsd - } - rn := m.getOperand_Reg(m.c.ValueDefinition(x)) - - i := m.allocateInstr() - i.asXmmUnaryRmR(vecOp, rn, rd) - m.insert(i) - } -} - -func (m *machine) lowerVIpopcnt(instr *ssa.Instruction) { - x := instr.Arg() - rn := m.getOperand_Reg(m.c.ValueDefinition(x)) - rd := m.c.VRegOf(instr.Return()) - - tmp1 := m.c.AllocateVReg(ssa.TypeV128) - m.lowerVconst(tmp1, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f) - - // Copy input into tmp2. - tmp2 := m.copyToTmp(rn.reg()) - - // Given that we have: - // rm = [b1, ..., b16] where bn = hn:ln and hn and ln are higher and lower 4-bits of bn. - // - // Take PAND on tmp1 and tmp2, so that we mask out all the higher bits. - // tmp2 = [l1, ..., l16]. - pand := m.allocateInstr() - pand.asXmmRmR(sseOpcodePand, newOperandReg(tmp1), tmp2) - m.insert(pand) - - // Do logical (packed word) right shift by 4 on rm and PAND against the mask (tmp1); meaning that we have - // tmp3 = [h1, ...., h16]. - tmp3 := m.copyToTmp(rn.reg()) - psrlw := m.allocateInstr() - psrlw.asXmmRmiReg(sseOpcodePsrlw, newOperandImm32(4), tmp3) - m.insert(psrlw) - - pand2 := m.allocateInstr() - pand2.asXmmRmR(sseOpcodePand, newOperandReg(tmp1), tmp3) - m.insert(pand2) - - // Read the popcntTable into tmp4, and we have - // tmp4 = [0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04] - tmp4 := m.c.AllocateVReg(ssa.TypeV128) - m.lowerVconst(tmp4, 0x03_02_02_01_02_01_01_00, 0x04_03_03_02_03_02_02_01) - - // Make a copy for later. - tmp5 := m.copyToTmp(tmp4) - - // tmp4 = [popcnt(l1), ..., popcnt(l16)]. - pshufb := m.allocateInstr() - pshufb.asXmmRmR(sseOpcodePshufb, newOperandReg(tmp2), tmp4) - m.insert(pshufb) - - pshufb2 := m.allocateInstr() - pshufb2.asXmmRmR(sseOpcodePshufb, newOperandReg(tmp3), tmp5) - m.insert(pshufb2) - - // tmp4 + tmp5 is the result. - paddb := m.allocateInstr() - paddb.asXmmRmR(sseOpcodePaddb, newOperandReg(tmp4), tmp5) - m.insert(paddb) - - m.copyTo(tmp5, rd) -} - -func (m *machine) lowerVImul(instr *ssa.Instruction) { - x, y, lane := instr.Arg2WithLane() - rd := m.c.VRegOf(instr.Return()) - if lane == ssa.VecLaneI64x2 { - rn := m.getOperand_Reg(m.c.ValueDefinition(x)) - rm := m.getOperand_Reg(m.c.ValueDefinition(y)) - // Assuming that we have - // rm = [p1, p2] = [p1_lo, p1_hi, p2_lo, p2_high] - // rn = [q1, q2] = [q1_lo, q1_hi, q2_lo, q2_high] - // where pN and qN are 64-bit (quad word) lane, and pN_lo, pN_hi, qN_lo and qN_hi are 32-bit (double word) lane. - - // Copy rn into tmp1. - tmp1 := m.copyToTmp(rn.reg()) - - // And do the logical right shift by 32-bit on tmp1, which makes tmp1 = [0, p1_high, 0, p2_high] - shift := m.allocateInstr() - shift.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(32), tmp1) - m.insert(shift) - - // Execute "pmuludq rm,tmp1", which makes tmp1 = [p1_high*q1_lo, p2_high*q2_lo] where each lane is 64-bit. - mul := m.allocateInstr() - mul.asXmmRmR(sseOpcodePmuludq, rm, tmp1) - m.insert(mul) - - // Copy rm value into tmp2. - tmp2 := m.copyToTmp(rm.reg()) - - // And do the logical right shift by 32-bit on tmp2, which makes tmp2 = [0, q1_high, 0, q2_high] - shift2 := m.allocateInstr() - shift2.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(32), tmp2) - m.insert(shift2) - - // Execute "pmuludq rm,tmp2", which makes tmp2 = [p1_lo*q1_high, p2_lo*q2_high] where each lane is 64-bit. - mul2 := m.allocateInstr() - mul2.asXmmRmR(sseOpcodePmuludq, rn, tmp2) - m.insert(mul2) - - // Adds tmp1 and tmp2 and do the logical left shift by 32-bit, - // which makes tmp1 = [(p1_lo*q1_high+p1_high*q1_lo)<<32, (p2_lo*q2_high+p2_high*q2_lo)<<32] - add := m.allocateInstr() - add.asXmmRmR(sseOpcodePaddq, newOperandReg(tmp2), tmp1) - m.insert(add) - - shift3 := m.allocateInstr() - shift3.asXmmRmiReg(sseOpcodePsllq, newOperandImm32(32), tmp1) - m.insert(shift3) - - // Copy rm value into tmp3. - tmp3 := m.copyToTmp(rm.reg()) - - // "pmuludq rm,tmp3" makes tmp3 = [p1_lo*q1_lo, p2_lo*q2_lo] where each lane is 64-bit. - mul3 := m.allocateInstr() - mul3.asXmmRmR(sseOpcodePmuludq, rn, tmp3) - m.insert(mul3) - - // Finally, we get the result by computing tmp1 + tmp3, - // which makes tmp1 = [(p1_lo*q1_high+p1_high*q1_lo)<<32+p1_lo*q1_lo, (p2_lo*q2_high+p2_high*q2_lo)<<32+p2_lo*q2_lo] - add2 := m.allocateInstr() - add2.asXmmRmR(sseOpcodePaddq, newOperandReg(tmp3), tmp1) - m.insert(add2) - - m.copyTo(tmp1, rd) - - } else { - var vecOp sseOpcode - switch lane { - case ssa.VecLaneI16x8: - vecOp = sseOpcodePmullw - case ssa.VecLaneI32x4: - vecOp = sseOpcodePmulld - default: - panic("unsupported: " + lane.String()) - } - m.lowerVbBinOp(vecOp, x, y, instr.Return()) - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go deleted file mode 100644 index 787975683..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go +++ /dev/null @@ -1,336 +0,0 @@ -package amd64 - -import ( - "fmt" - "unsafe" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -type operand struct { - kind operandKind - data uint64 -} - -type operandKind byte - -const ( - // operandKindReg is an operand which is an integer Register. - operandKindReg operandKind = iota + 1 - - // operandKindMem is a value in Memory. - // 32, 64, or 128 bit value. - operandKindMem - - // operandKindImm32 is a signed-32-bit integer immediate value. - operandKindImm32 - - // operandKindLabel is a label. - operandKindLabel -) - -// String implements fmt.Stringer. -func (o operandKind) String() string { - switch o { - case operandKindReg: - return "reg" - case operandKindMem: - return "mem" - case operandKindImm32: - return "imm32" - case operandKindLabel: - return "label" - default: - panic("BUG: invalid operand kind") - } -} - -// format returns the string representation of the operand. -// _64 is only for the case where the operand is a register, and it's integer. -func (o *operand) format(_64 bool) string { - switch o.kind { - case operandKindReg: - return formatVRegSized(o.reg(), _64) - case operandKindMem: - return o.addressMode().String() - case operandKindImm32: - return fmt.Sprintf("$%d", int32(o.imm32())) - case operandKindLabel: - return label(o.imm32()).String() - default: - panic(fmt.Sprintf("BUG: invalid operand: %s", o.kind)) - } -} - -//go:inline -func (o *operand) reg() regalloc.VReg { - return regalloc.VReg(o.data) -} - -//go:inline -func (o *operand) setReg(r regalloc.VReg) { - o.data = uint64(r) -} - -//go:inline -func (o *operand) addressMode() *amode { - return wazevoapi.PtrFromUintptr[amode](uintptr(o.data)) -} - -//go:inline -func (o *operand) imm32() uint32 { - return uint32(o.data) -} - -func (o *operand) label() label { - switch o.kind { - case operandKindLabel: - return label(o.data) - case operandKindMem: - mem := o.addressMode() - if mem.kind() != amodeRipRel { - panic("BUG: invalid label") - } - return label(mem.imm32) - default: - panic("BUG: invalid operand kind") - } -} - -func newOperandLabel(label label) operand { - return operand{kind: operandKindLabel, data: uint64(label)} -} - -func newOperandReg(r regalloc.VReg) operand { - return operand{kind: operandKindReg, data: uint64(r)} -} - -func newOperandImm32(imm32 uint32) operand { - return operand{kind: operandKindImm32, data: uint64(imm32)} -} - -func newOperandMem(amode *amode) operand { - return operand{kind: operandKindMem, data: uint64(uintptr(unsafe.Pointer(amode)))} -} - -// amode is a memory operand (addressing mode). -type amode struct { - kindWithShift uint32 - imm32 uint32 - base regalloc.VReg - - // For amodeRegRegShift: - index regalloc.VReg -} - -type amodeKind byte - -const ( - // amodeRegRegShift calculates sign-extend-32-to-64(Immediate) + base - amodeImmReg amodeKind = iota + 1 - - // amodeImmRBP is the same as amodeImmReg, but the base register is fixed to RBP. - // The only differece is that it doesn't tell the register allocator to use RBP which is distracting for the - // register allocator. - amodeImmRBP - - // amodeRegRegShift calculates sign-extend-32-to-64(Immediate) + base + (Register2 << Shift) - amodeRegRegShift - - // amodeRipRel is a RIP-relative addressing mode specified by the label. - amodeRipRel - - // TODO: there are other addressing modes such as the one without base register. -) - -func (a *amode) kind() amodeKind { - return amodeKind(a.kindWithShift & 0xff) -} - -func (a *amode) shift() byte { - return byte(a.kindWithShift >> 8) -} - -func (a *amode) uses(rs *[]regalloc.VReg) { - switch a.kind() { - case amodeImmReg: - *rs = append(*rs, a.base) - case amodeRegRegShift: - *rs = append(*rs, a.base, a.index) - case amodeImmRBP, amodeRipRel: - default: - panic("BUG: invalid amode kind") - } -} - -func (a *amode) nregs() int { - switch a.kind() { - case amodeImmReg: - return 1 - case amodeRegRegShift: - return 2 - case amodeImmRBP, amodeRipRel: - return 0 - default: - panic("BUG: invalid amode kind") - } -} - -func (a *amode) assignUses(i int, reg regalloc.VReg) { - switch a.kind() { - case amodeImmReg: - if i == 0 { - a.base = reg - } else { - panic("BUG: invalid amode assignment") - } - case amodeRegRegShift: - if i == 0 { - a.base = reg - } else if i == 1 { - a.index = reg - } else { - panic("BUG: invalid amode assignment") - } - default: - panic("BUG: invalid amode assignment") - } -} - -func (m *machine) newAmodeImmReg(imm32 uint32, base regalloc.VReg) *amode { - ret := m.amodePool.Allocate() - *ret = amode{kindWithShift: uint32(amodeImmReg), imm32: imm32, base: base} - return ret -} - -func (m *machine) newAmodeImmRBPReg(imm32 uint32) *amode { - ret := m.amodePool.Allocate() - *ret = amode{kindWithShift: uint32(amodeImmRBP), imm32: imm32, base: rbpVReg} - return ret -} - -func (m *machine) newAmodeRegRegShift(imm32 uint32, base, index regalloc.VReg, shift byte) *amode { - if shift > 3 { - panic(fmt.Sprintf("BUG: invalid shift (must be 3>=): %d", shift)) - } - ret := m.amodePool.Allocate() - *ret = amode{kindWithShift: uint32(amodeRegRegShift) | uint32(shift)<<8, imm32: imm32, base: base, index: index} - return ret -} - -func (m *machine) newAmodeRipRel(label label) *amode { - ret := m.amodePool.Allocate() - *ret = amode{kindWithShift: uint32(amodeRipRel), imm32: uint32(label)} - return ret -} - -// String implements fmt.Stringer. -func (a *amode) String() string { - switch a.kind() { - case amodeImmReg, amodeImmRBP: - if a.imm32 == 0 { - return fmt.Sprintf("(%s)", formatVRegSized(a.base, true)) - } - return fmt.Sprintf("%d(%s)", int32(a.imm32), formatVRegSized(a.base, true)) - case amodeRegRegShift: - shift := 1 << a.shift() - if a.imm32 == 0 { - return fmt.Sprintf( - "(%s,%s,%d)", - formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift) - } - return fmt.Sprintf( - "%d(%s,%s,%d)", - int32(a.imm32), formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift) - case amodeRipRel: - return fmt.Sprintf("%s(%%rip)", label(a.imm32)) - default: - panic("BUG: invalid amode kind") - } -} - -func (m *machine) getOperand_Mem_Reg(def backend.SSAValueDefinition) (op operand) { - if !def.IsFromInstr() { - return newOperandReg(m.c.VRegOf(def.V)) - } - - if def.V.Type() == ssa.TypeV128 { - // SIMD instructions require strict memory alignment, so we don't support the memory operand for V128 at the moment. - return m.getOperand_Reg(def) - } - - if m.c.MatchInstr(def, ssa.OpcodeLoad) { - instr := def.Instr - ptr, offset, _ := instr.LoadData() - op = newOperandMem(m.lowerToAddressMode(ptr, offset)) - instr.MarkLowered() - return op - } - return m.getOperand_Reg(def) -} - -func (m *machine) getOperand_Mem_Imm32_Reg(def backend.SSAValueDefinition) (op operand) { - if !def.IsFromInstr() { - return newOperandReg(m.c.VRegOf(def.V)) - } - - if m.c.MatchInstr(def, ssa.OpcodeLoad) { - instr := def.Instr - ptr, offset, _ := instr.LoadData() - op = newOperandMem(m.lowerToAddressMode(ptr, offset)) - instr.MarkLowered() - return op - } - return m.getOperand_Imm32_Reg(def) -} - -func (m *machine) getOperand_Imm32_Reg(def backend.SSAValueDefinition) (op operand) { - if !def.IsFromInstr() { - return newOperandReg(m.c.VRegOf(def.V)) - } - - instr := def.Instr - if instr.Constant() { - // If the operation is 64-bit, x64 sign-extends the 32-bit immediate value. - // Therefore, we need to check if the immediate value is within the 32-bit range and if the sign bit is set, - // we should not use the immediate value. - if op, ok := asImm32Operand(instr.ConstantVal(), instr.Return().Type() == ssa.TypeI32); ok { - instr.MarkLowered() - return op - } - } - return m.getOperand_Reg(def) -} - -func asImm32Operand(val uint64, allowSignExt bool) (operand, bool) { - if imm32, ok := asImm32(val, allowSignExt); ok { - return newOperandImm32(imm32), true - } - return operand{}, false -} - -func asImm32(val uint64, allowSignExt bool) (uint32, bool) { - u32val := uint32(val) - if uint64(u32val) != val { - return 0, false - } - if !allowSignExt && u32val&0x80000000 != 0 { - return 0, false - } - return u32val, true -} - -func (m *machine) getOperand_Reg(def backend.SSAValueDefinition) (op operand) { - var v regalloc.VReg - if instr := def.Instr; instr != nil && instr.Constant() { - // We inline all the constant instructions so that we could reduce the register usage. - v = m.lowerConstant(instr) - instr.MarkLowered() - } else { - v = m.c.VRegOf(def.V) - } - return newOperandReg(v) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reg.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reg.go deleted file mode 100644 index 4aec856fa..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reg.go +++ /dev/null @@ -1,181 +0,0 @@ -package amd64 - -import ( - "fmt" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" -) - -// Amd64-specific registers. -const ( - // rax is a gp register. - rax = regalloc.RealRegInvalid + 1 + iota - // rcx is a gp register. - rcx - // rdx is a gp register. - rdx - // rbx is a gp register. - rbx - // rsp is a gp register. - rsp - // rbp is a gp register. - rbp - // rsi is a gp register. - rsi - // rdi is a gp register. - rdi - // r8 is a gp register. - r8 - // r9 is a gp register. - r9 - // r10 is a gp register. - r10 - // r11 is a gp register. - r11 - // r12 is a gp register. - r12 - // r13 is a gp register. - r13 - // r14 is a gp register. - r14 - // r15 is a gp register. - r15 - - // xmm0 is a vector register. - xmm0 - // xmm1 is a vector register. - xmm1 - // xmm2 is a vector register. - xmm2 - // xmm3 is a vector register. - xmm3 - // xmm4 is a vector register. - xmm4 - // xmm5 is a vector register. - xmm5 - // xmm6 is a vector register. - xmm6 - // xmm7 is a vector register. - xmm7 - // xmm8 is a vector register. - xmm8 - // xmm9 is a vector register. - xmm9 - // xmm10 is a vector register. - xmm10 - // xmm11 is a vector register. - xmm11 - // xmm12 is a vector register. - xmm12 - // xmm13 is a vector register. - xmm13 - // xmm14 is a vector register. - xmm14 - // xmm15 is a vector register. - xmm15 -) - -var ( - raxVReg = regalloc.FromRealReg(rax, regalloc.RegTypeInt) - rcxVReg = regalloc.FromRealReg(rcx, regalloc.RegTypeInt) - rdxVReg = regalloc.FromRealReg(rdx, regalloc.RegTypeInt) - rbxVReg = regalloc.FromRealReg(rbx, regalloc.RegTypeInt) - rspVReg = regalloc.FromRealReg(rsp, regalloc.RegTypeInt) - rbpVReg = regalloc.FromRealReg(rbp, regalloc.RegTypeInt) - rsiVReg = regalloc.FromRealReg(rsi, regalloc.RegTypeInt) - rdiVReg = regalloc.FromRealReg(rdi, regalloc.RegTypeInt) - r8VReg = regalloc.FromRealReg(r8, regalloc.RegTypeInt) - r9VReg = regalloc.FromRealReg(r9, regalloc.RegTypeInt) - r10VReg = regalloc.FromRealReg(r10, regalloc.RegTypeInt) - r11VReg = regalloc.FromRealReg(r11, regalloc.RegTypeInt) - r12VReg = regalloc.FromRealReg(r12, regalloc.RegTypeInt) - r13VReg = regalloc.FromRealReg(r13, regalloc.RegTypeInt) - r14VReg = regalloc.FromRealReg(r14, regalloc.RegTypeInt) - r15VReg = regalloc.FromRealReg(r15, regalloc.RegTypeInt) - - xmm0VReg = regalloc.FromRealReg(xmm0, regalloc.RegTypeFloat) - xmm1VReg = regalloc.FromRealReg(xmm1, regalloc.RegTypeFloat) - xmm2VReg = regalloc.FromRealReg(xmm2, regalloc.RegTypeFloat) - xmm3VReg = regalloc.FromRealReg(xmm3, regalloc.RegTypeFloat) - xmm4VReg = regalloc.FromRealReg(xmm4, regalloc.RegTypeFloat) - xmm5VReg = regalloc.FromRealReg(xmm5, regalloc.RegTypeFloat) - xmm6VReg = regalloc.FromRealReg(xmm6, regalloc.RegTypeFloat) - xmm7VReg = regalloc.FromRealReg(xmm7, regalloc.RegTypeFloat) - xmm8VReg = regalloc.FromRealReg(xmm8, regalloc.RegTypeFloat) - xmm9VReg = regalloc.FromRealReg(xmm9, regalloc.RegTypeFloat) - xmm10VReg = regalloc.FromRealReg(xmm10, regalloc.RegTypeFloat) - xmm11VReg = regalloc.FromRealReg(xmm11, regalloc.RegTypeFloat) - xmm12VReg = regalloc.FromRealReg(xmm12, regalloc.RegTypeFloat) - xmm13VReg = regalloc.FromRealReg(xmm13, regalloc.RegTypeFloat) - xmm14VReg = regalloc.FromRealReg(xmm14, regalloc.RegTypeFloat) - xmm15VReg = regalloc.FromRealReg(xmm15, regalloc.RegTypeFloat) -) - -var regNames = [...]string{ - rax: "rax", - rcx: "rcx", - rdx: "rdx", - rbx: "rbx", - rsp: "rsp", - rbp: "rbp", - rsi: "rsi", - rdi: "rdi", - r8: "r8", - r9: "r9", - r10: "r10", - r11: "r11", - r12: "r12", - r13: "r13", - r14: "r14", - r15: "r15", - xmm0: "xmm0", - xmm1: "xmm1", - xmm2: "xmm2", - xmm3: "xmm3", - xmm4: "xmm4", - xmm5: "xmm5", - xmm6: "xmm6", - xmm7: "xmm7", - xmm8: "xmm8", - xmm9: "xmm9", - xmm10: "xmm10", - xmm11: "xmm11", - xmm12: "xmm12", - xmm13: "xmm13", - xmm14: "xmm14", - xmm15: "xmm15", -} - -func formatVRegSized(r regalloc.VReg, _64 bool) string { - if r.IsRealReg() { - if r.RegType() == regalloc.RegTypeInt { - rr := r.RealReg() - orig := regNames[rr] - if rr <= rdi { - if _64 { - return "%" + orig - } else { - return "%e" + orig[1:] - } - } else { - if _64 { - return "%" + orig - } else { - return "%" + orig + "d" - } - } - } else { - return "%" + regNames[r.RealReg()] - } - } else { - if r.RegType() == regalloc.RegTypeInt { - if _64 { - return fmt.Sprintf("%%r%d?", r.ID()) - } else { - return fmt.Sprintf("%%r%dd?", r.ID()) - } - } else { - return fmt.Sprintf("%%xmm%d?", r.ID()) - } - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go deleted file mode 100644 index ef823bdbd..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go +++ /dev/null @@ -1,130 +0,0 @@ -package amd64 - -import ( - "encoding/binary" - "reflect" - "unsafe" - - "github.com/tetratelabs/wazero/internal/wasmdebug" -) - -func stackView(rbp, top uintptr) []byte { - l := int(top - rbp) - var stackBuf []byte - { - //nolint:staticcheck - hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf)) - hdr.Data = rbp - hdr.Len = l - hdr.Cap = l - } - return stackBuf -} - -// UnwindStack implements wazevo.unwindStack. -func UnwindStack(_, rbp, top uintptr, returnAddresses []uintptr) []uintptr { - stackBuf := stackView(rbp, top) - - for i := uint64(0); i < uint64(len(stackBuf)); { - // (high address) - // +-----------------+ - // | ....... | - // | ret Y | - // | ....... | - // | ret 0 | - // | arg X | - // | ....... | - // | arg 1 | - // | arg 0 | - // | ReturnAddress | - // | Caller_RBP | - // +-----------------+ <---- Caller_RBP - // | ........... | - // | clobbered M | - // | ............ | - // | clobbered 0 | - // | spill slot N | - // | ............ | - // | spill slot 0 | - // | ReturnAddress | - // | Caller_RBP | - // +-----------------+ <---- RBP - // (low address) - - callerRBP := binary.LittleEndian.Uint64(stackBuf[i:]) - retAddr := binary.LittleEndian.Uint64(stackBuf[i+8:]) - returnAddresses = append(returnAddresses, uintptr(retAddr)) - i = callerRBP - uint64(rbp) - if len(returnAddresses) == wasmdebug.MaxFrames { - break - } - } - return returnAddresses -} - -// GoCallStackView implements wazevo.goCallStackView. -func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 { - // (high address) - // +-----------------+ <----+ - // | xxxxxxxxxxx | | ;; optional unused space to make it 16-byte aligned. - // ^ | arg[N]/ret[M] | | - // sliceSize | | ............ | | SizeInBytes/8 - // | | arg[1]/ret[1] | | - // v | arg[0]/ret[0] | <----+ - // | SizeInBytes | - // +-----------------+ <---- stackPointerBeforeGoCall - // (low address) - data := unsafe.Add(unsafe.Pointer(stackPointerBeforeGoCall), 8) - size := *stackPointerBeforeGoCall / 8 - return unsafe.Slice((*uint64)(data), size) -} - -func AdjustClonedStack(oldRsp, oldTop, rsp, rbp, top uintptr) { - diff := uint64(rsp - oldRsp) - - newBuf := stackView(rbp, top) - for i := uint64(0); i < uint64(len(newBuf)); { - // (high address) - // +-----------------+ - // | ....... | - // | ret Y | - // | ....... | - // | ret 0 | - // | arg X | - // | ....... | - // | arg 1 | - // | arg 0 | - // | ReturnAddress | - // | Caller_RBP | - // +-----------------+ <---- Caller_RBP - // | ........... | - // | clobbered M | - // | ............ | - // | clobbered 0 | - // | spill slot N | - // | ............ | - // | spill slot 0 | - // | ReturnAddress | - // | Caller_RBP | - // +-----------------+ <---- RBP - // (low address) - - callerRBP := binary.LittleEndian.Uint64(newBuf[i:]) - if callerRBP == 0 { - // End of stack. - break - } - if i64 := int64(callerRBP); i64 < int64(oldRsp) || i64 >= int64(oldTop) { - panic("BUG: callerRBP is out of range") - } - if int(callerRBP) < 0 { - panic("BUG: callerRBP is negative") - } - adjustedCallerRBP := callerRBP + diff - if int(adjustedCallerRBP) < 0 { - panic("BUG: adjustedCallerRBP is negative") - } - binary.LittleEndian.PutUint64(newBuf[i:], adjustedCallerRBP) - i = adjustedCallerRBP - uint64(rbp) - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go deleted file mode 100644 index d1eaa7cd4..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go +++ /dev/null @@ -1,333 +0,0 @@ -package arm64 - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -// References: -// * https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#arm64-architecture -// * https://developer.arm.com/documentation/102374/0101/Procedure-Call-Standard - -var ( - intParamResultRegs = []regalloc.RealReg{x0, x1, x2, x3, x4, x5, x6, x7} - floatParamResultRegs = []regalloc.RealReg{v0, v1, v2, v3, v4, v5, v6, v7} -) - -var regInfo = ®alloc.RegisterInfo{ - AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{ - // We don't allocate: - // - x18: Reserved by the macOS: https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Respect-the-purpose-of-specific-CPU-registers - // - x28: Reserved by Go runtime. - // - x27(=tmpReg): because of the reason described on tmpReg. - regalloc.RegTypeInt: { - x8, x9, x10, x11, x12, x13, x14, x15, - x16, x17, x19, x20, x21, x22, x23, x24, x25, - x26, x29, x30, - // These are the argument/return registers. Less preferred in the allocation. - x7, x6, x5, x4, x3, x2, x1, x0, - }, - regalloc.RegTypeFloat: { - v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, - v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, - // These are the argument/return registers. Less preferred in the allocation. - v7, v6, v5, v4, v3, v2, v1, v0, - }, - }, - CalleeSavedRegisters: regalloc.NewRegSet( - x19, x20, x21, x22, x23, x24, x25, x26, x28, - v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, - ), - CallerSavedRegisters: regalloc.NewRegSet( - x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x29, x30, - v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, - ), - RealRegToVReg: []regalloc.VReg{ - x0: x0VReg, x1: x1VReg, x2: x2VReg, x3: x3VReg, x4: x4VReg, x5: x5VReg, x6: x6VReg, x7: x7VReg, x8: x8VReg, x9: x9VReg, x10: x10VReg, x11: x11VReg, x12: x12VReg, x13: x13VReg, x14: x14VReg, x15: x15VReg, x16: x16VReg, x17: x17VReg, x18: x18VReg, x19: x19VReg, x20: x20VReg, x21: x21VReg, x22: x22VReg, x23: x23VReg, x24: x24VReg, x25: x25VReg, x26: x26VReg, x27: x27VReg, x28: x28VReg, x29: x29VReg, x30: x30VReg, - v0: v0VReg, v1: v1VReg, v2: v2VReg, v3: v3VReg, v4: v4VReg, v5: v5VReg, v6: v6VReg, v7: v7VReg, v8: v8VReg, v9: v9VReg, v10: v10VReg, v11: v11VReg, v12: v12VReg, v13: v13VReg, v14: v14VReg, v15: v15VReg, v16: v16VReg, v17: v17VReg, v18: v18VReg, v19: v19VReg, v20: v20VReg, v21: v21VReg, v22: v22VReg, v23: v23VReg, v24: v24VReg, v25: v25VReg, v26: v26VReg, v27: v27VReg, v28: v28VReg, v29: v29VReg, v30: v30VReg, v31: v31VReg, - }, - RealRegName: func(r regalloc.RealReg) string { return regNames[r] }, - RealRegType: func(r regalloc.RealReg) regalloc.RegType { - if r < v0 { - return regalloc.RegTypeInt - } - return regalloc.RegTypeFloat - }, -} - -// ArgsResultsRegs implements backend.Machine. -func (m *machine) ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) { - return intParamResultRegs, floatParamResultRegs -} - -// LowerParams implements backend.FunctionABI. -func (m *machine) LowerParams(args []ssa.Value) { - a := m.currentABI - - for i, ssaArg := range args { - if !ssaArg.Valid() { - continue - } - reg := m.compiler.VRegOf(ssaArg) - arg := &a.Args[i] - if arg.Kind == backend.ABIArgKindReg { - m.InsertMove(reg, arg.Reg, arg.Type) - } else { - // TODO: we could use pair load if there's consecutive loads for the same type. - // - // (high address) - // +-----------------+ - // | ....... | - // | ret Y | - // | ....... | - // | ret 0 | - // | arg X | - // | ....... | - // | arg 1 | - // | arg 0 | <-| - // | ReturnAddress | | - // +-----------------+ | - // | ........... | | - // | clobbered M | | argStackOffset: is unknown at this point of compilation. - // | ............ | | - // | clobbered 0 | | - // | spill slot N | | - // | ........... | | - // | spill slot 0 | | - // SP---> +-----------------+ <-+ - // (low address) - - bits := arg.Type.Bits() - // At this point of compilation, we don't yet know how much space exist below the return address. - // So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation. - amode := m.amodePool.Allocate() - *amode = addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace} - load := m.allocateInstr() - switch arg.Type { - case ssa.TypeI32, ssa.TypeI64: - load.asULoad(reg, amode, bits) - case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - load.asFpuLoad(reg, amode, bits) - default: - panic("BUG") - } - m.insert(load) - m.unresolvedAddressModes = append(m.unresolvedAddressModes, load) - } - } -} - -// LowerReturns lowers the given returns. -func (m *machine) LowerReturns(rets []ssa.Value) { - a := m.currentABI - - l := len(rets) - 1 - for i := range rets { - // Reverse order in order to avoid overwriting the stack returns existing in the return registers. - ret := rets[l-i] - r := &a.Rets[l-i] - reg := m.compiler.VRegOf(ret) - if def := m.compiler.ValueDefinition(ret); def.IsFromInstr() { - // Constant instructions are inlined. - if inst := def.Instr; inst.Constant() { - val := inst.Return() - valType := val.Type() - v := inst.ConstantVal() - m.insertLoadConstant(v, valType, reg) - } - } - if r.Kind == backend.ABIArgKindReg { - m.InsertMove(r.Reg, reg, ret.Type()) - } else { - // TODO: we could use pair store if there's consecutive stores for the same type. - // - // (high address) - // +-----------------+ - // | ....... | - // | ret Y | - // | ....... | - // | ret 0 | <-+ - // | arg X | | - // | ....... | | - // | arg 1 | | - // | arg 0 | | - // | ReturnAddress | | - // +-----------------+ | - // | ........... | | - // | spill slot M | | retStackOffset: is unknown at this point of compilation. - // | ............ | | - // | spill slot 2 | | - // | spill slot 1 | | - // | clobbered 0 | | - // | clobbered 1 | | - // | ........... | | - // | clobbered N | | - // SP---> +-----------------+ <-+ - // (low address) - - bits := r.Type.Bits() - - // At this point of compilation, we don't yet know how much space exist below the return address. - // So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation. - amode := m.amodePool.Allocate() - *amode = addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace} - store := m.allocateInstr() - store.asStore(operandNR(reg), amode, bits) - m.insert(store) - m.unresolvedAddressModes = append(m.unresolvedAddressModes, store) - } - } -} - -// callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the -// caller side of the function call. -func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def backend.SSAValueDefinition, slotBegin int64) { - arg := &a.Args[argIndex] - if def.IsFromInstr() { - // Constant instructions are inlined. - if inst := def.Instr; inst.Constant() { - val := inst.Return() - valType := val.Type() - v := inst.ConstantVal() - m.insertLoadConstant(v, valType, reg) - } - } - if arg.Kind == backend.ABIArgKindReg { - m.InsertMove(arg.Reg, reg, arg.Type) - } else { - // TODO: we could use pair store if there's consecutive stores for the same type. - // - // Note that at this point, stack pointer is already adjusted. - bits := arg.Type.Bits() - amode := m.resolveAddressModeForOffset(arg.Offset-slotBegin, bits, spVReg, false) - store := m.allocateInstr() - store.asStore(operandNR(reg), amode, bits) - m.insert(store) - } -} - -func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex int, reg regalloc.VReg, slotBegin int64) { - r := &a.Rets[retIndex] - if r.Kind == backend.ABIArgKindReg { - m.InsertMove(reg, r.Reg, r.Type) - } else { - // TODO: we could use pair load if there's consecutive loads for the same type. - amode := m.resolveAddressModeForOffset(a.ArgStackSize+r.Offset-slotBegin, r.Type.Bits(), spVReg, false) - ldr := m.allocateInstr() - switch r.Type { - case ssa.TypeI32, ssa.TypeI64: - ldr.asULoad(reg, amode, r.Type.Bits()) - case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - ldr.asFpuLoad(reg, amode, r.Type.Bits()) - default: - panic("BUG") - } - m.insert(ldr) - } -} - -func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, *addressMode) { - m.pendingInstructions = m.pendingInstructions[:0] - mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse) - for _, instr := range m.pendingInstructions { - cur = linkInstr(cur, instr) - } - return cur, mode -} - -func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) *addressMode { - if rn.RegType() != regalloc.RegTypeInt { - panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64)) - } - amode := m.amodePool.Allocate() - if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) { - *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset} - } else if offsetFitsInAddressModeKindRegSignedImm9(offset) { - *amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset} - } else { - var indexReg regalloc.VReg - if allowTmpRegUse { - m.lowerConstantI64(tmpRegVReg, offset) - indexReg = tmpRegVReg - } else { - indexReg = m.compiler.AllocateVReg(ssa.TypeI64) - m.lowerConstantI64(indexReg, offset) - } - *amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */} - } - return amode -} - -func (m *machine) lowerCall(si *ssa.Instruction) { - isDirectCall := si.Opcode() == ssa.OpcodeCall - var indirectCalleePtr ssa.Value - var directCallee ssa.FuncRef - var sigID ssa.SignatureID - var args []ssa.Value - if isDirectCall { - directCallee, sigID, args = si.CallData() - } else { - indirectCalleePtr, sigID, args, _ /* on arm64, the calling convention is compatible with the Go runtime */ = si.CallIndirectData() - } - calleeABI := m.compiler.GetFunctionABI(m.compiler.SSABuilder().ResolveSignature(sigID)) - - stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize()) - if m.maxRequiredStackSizeForCalls < stackSlotSize+16 { - m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // return address frame. - } - - for i, arg := range args { - reg := m.compiler.VRegOf(arg) - def := m.compiler.ValueDefinition(arg) - m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize) - } - - if isDirectCall { - call := m.allocateInstr() - call.asCall(directCallee, calleeABI) - m.insert(call) - } else { - ptr := m.compiler.VRegOf(indirectCalleePtr) - callInd := m.allocateInstr() - callInd.asCallIndirect(ptr, calleeABI) - m.insert(callInd) - } - - var index int - r1, rs := si.Returns() - if r1.Valid() { - m.callerGenFunctionReturnVReg(calleeABI, 0, m.compiler.VRegOf(r1), stackSlotSize) - index++ - } - - for _, r := range rs { - m.callerGenFunctionReturnVReg(calleeABI, index, m.compiler.VRegOf(r), stackSlotSize) - index++ - } -} - -func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) { - if imm12Operand, ok := asImm12Operand(uint64(diff)); ok { - alu := m.allocateInstr() - var ao aluOp - if add { - ao = aluOpAdd - } else { - ao = aluOpSub - } - alu.asALU(ao, rd, operandNR(spVReg), imm12Operand, true) - m.insert(alu) - } else { - m.lowerConstantI64(tmpRegVReg, diff) - alu := m.allocateInstr() - var ao aluOp - if add { - ao = aluOpAdd - } else { - ao = aluOpSub - } - alu.asALU(ao, rd, operandNR(spVReg), operandNR(tmpRegVReg), true) - m.insert(alu) - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.go deleted file mode 100644 index 5f0c613df..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.go +++ /dev/null @@ -1,9 +0,0 @@ -package arm64 - -// entrypoint enters the machine code generated by this backend which begins with the preamble generated by functionABI.EmitGoEntryPreamble below. -// This implements wazevo.entrypoint, and see the comments there for detail. -func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr) - -// afterGoFunctionCallEntrypoint enters the machine code after growing the stack. -// This implements wazevo.afterGoFunctionCallEntrypoint, and see the comments there for detail. -func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.s b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.s deleted file mode 100644 index 0b579f852..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.s +++ /dev/null @@ -1,29 +0,0 @@ -//go:build arm64 - -#include "funcdata.h" -#include "textflag.h" - -// See the comments on EmitGoEntryPreamble for what this function is supposed to do. -TEXT ·entrypoint(SB), NOSPLIT|NOFRAME, $0-48 - MOVD preambleExecutable+0(FP), R27 - MOVD functionExectuable+8(FP), R24 - MOVD executionContextPtr+16(FP), R0 - MOVD moduleContextPtr+24(FP), R1 - MOVD paramResultSlicePtr+32(FP), R19 - MOVD goAllocatedStackSlicePtr+40(FP), R26 - JMP (R27) - -TEXT ·afterGoFunctionCallEntrypoint(SB), NOSPLIT|NOFRAME, $0-32 - MOVD goCallReturnAddress+0(FP), R20 - MOVD executionContextPtr+8(FP), R0 - MOVD stackPointer+16(FP), R19 - - // Save the current FP(R29), SP and LR(R30) into the wazevo.executionContext (stored in R0). - MOVD R29, 16(R0) // Store FP(R29) into [RO, #ExecutionContextOffsets.OriginalFramePointer] - MOVD RSP, R27 // Move SP to R27 (temporary register) since SP cannot be stored directly in str instructions. - MOVD R27, 24(R0) // Store R27 into [RO, #ExecutionContextOffsets.OriginalFramePointer] - MOVD R30, 32(R0) // Store R30 into [R0, #ExecutionContextOffsets.GoReturnAddress] - - // Load the new stack pointer (which sits somewhere in Go-allocated stack) into SP. - MOVD R19, RSP - JMP (R20) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go deleted file mode 100644 index f8b5d97ac..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go +++ /dev/null @@ -1,233 +0,0 @@ -package arm64 - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -// CompileEntryPreamble implements backend.Machine. This assumes `entrypoint` function (in abi_go_entry_arm64.s) passes: -// -// 1. First (execution context ptr) and Second arguments are already passed in x0, and x1. -// 2. param/result slice ptr in x19; the pointer to []uint64{} which is used to pass arguments and accept return values. -// 3. Go-allocated stack slice ptr in x26. -// 4. Function executable in x24. -// -// also SP and FP are correct Go-runtime-based values, and LR is the return address to the Go-side caller. -func (m *machine) CompileEntryPreamble(signature *ssa.Signature) []byte { - root := m.constructEntryPreamble(signature) - m.encode(root) - return m.compiler.Buf() -} - -var ( - executionContextPtrReg = x0VReg - // callee-saved regs so that they can be used in the prologue and epilogue. - paramResultSlicePtr = x19VReg - savedExecutionContextPtr = x20VReg - // goAllocatedStackPtr is not used in the epilogue. - goAllocatedStackPtr = x26VReg - // paramResultSliceCopied is not used in the epilogue. - paramResultSliceCopied = x25VReg - // tmpRegVReg is not used in the epilogue. - functionExecutable = x24VReg -) - -func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regalloc.VReg, arg *backend.ABIArg, argStartOffsetFromSP int64) *instruction { - typ := arg.Type - bits := typ.Bits() - isStackArg := arg.Kind == backend.ABIArgKindStack - - var loadTargetReg operand - if !isStackArg { - loadTargetReg = operandNR(arg.Reg) - } else { - switch typ { - case ssa.TypeI32, ssa.TypeI64: - loadTargetReg = operandNR(x15VReg) - case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - loadTargetReg = operandNR(v15VReg) - default: - panic("TODO?") - } - } - - var postIndexImm int64 - if typ == ssa.TypeV128 { - postIndexImm = 16 // v128 is represented as 2x64-bit in Go slice. - } else { - postIndexImm = 8 - } - loadMode := m.amodePool.Allocate() - *loadMode = addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm} - - instr := m.allocateInstr() - switch typ { - case ssa.TypeI32: - instr.asULoad(loadTargetReg.reg(), loadMode, 32) - case ssa.TypeI64: - instr.asULoad(loadTargetReg.reg(), loadMode, 64) - case ssa.TypeF32: - instr.asFpuLoad(loadTargetReg.reg(), loadMode, 32) - case ssa.TypeF64: - instr.asFpuLoad(loadTargetReg.reg(), loadMode, 64) - case ssa.TypeV128: - instr.asFpuLoad(loadTargetReg.reg(), loadMode, 128) - } - cur = linkInstr(cur, instr) - - if isStackArg { - var storeMode *addressMode - cur, storeMode = m.resolveAddressModeForOffsetAndInsert(cur, argStartOffsetFromSP+arg.Offset, bits, spVReg, true) - toStack := m.allocateInstr() - toStack.asStore(loadTargetReg, storeMode, bits) - cur = linkInstr(cur, toStack) - } - return cur -} - -func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr regalloc.VReg, result *backend.ABIArg, resultStartOffsetFromSP int64) *instruction { - isStackArg := result.Kind == backend.ABIArgKindStack - typ := result.Type - bits := typ.Bits() - - var storeTargetReg operand - if !isStackArg { - storeTargetReg = operandNR(result.Reg) - } else { - switch typ { - case ssa.TypeI32, ssa.TypeI64: - storeTargetReg = operandNR(x15VReg) - case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - storeTargetReg = operandNR(v15VReg) - default: - panic("TODO?") - } - } - - var postIndexImm int64 - if typ == ssa.TypeV128 { - postIndexImm = 16 // v128 is represented as 2x64-bit in Go slice. - } else { - postIndexImm = 8 - } - - if isStackArg { - var loadMode *addressMode - cur, loadMode = m.resolveAddressModeForOffsetAndInsert(cur, resultStartOffsetFromSP+result.Offset, bits, spVReg, true) - toReg := m.allocateInstr() - switch typ { - case ssa.TypeI32, ssa.TypeI64: - toReg.asULoad(storeTargetReg.reg(), loadMode, bits) - case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - toReg.asFpuLoad(storeTargetReg.reg(), loadMode, bits) - default: - panic("TODO?") - } - cur = linkInstr(cur, toReg) - } - - mode := m.amodePool.Allocate() - *mode = addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm} - instr := m.allocateInstr() - instr.asStore(storeTargetReg, mode, bits) - cur = linkInstr(cur, instr) - return cur -} - -func (m *machine) constructEntryPreamble(sig *ssa.Signature) (root *instruction) { - abi := backend.FunctionABI{} - abi.Init(sig, intParamResultRegs, floatParamResultRegs) - - root = m.allocateNop() - - //// ----------------------------------- prologue ----------------------------------- //// - - // First, we save executionContextPtrReg into a callee-saved register so that it can be used in epilogue as well. - // mov savedExecutionContextPtr, x0 - cur := m.move64(savedExecutionContextPtr, executionContextPtrReg, root) - - // Next, save the current FP, SP and LR into the wazevo.executionContext: - // str fp, [savedExecutionContextPtr, #OriginalFramePointer] - // mov tmp, sp ;; sp cannot be str'ed directly. - // str sp, [savedExecutionContextPtr, #OriginalStackPointer] - // str lr, [savedExecutionContextPtr, #GoReturnAddress] - cur = m.loadOrStoreAtExecutionContext(fpVReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, true, cur) - cur = m.move64(tmpRegVReg, spVReg, cur) - cur = m.loadOrStoreAtExecutionContext(tmpRegVReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, true, cur) - cur = m.loadOrStoreAtExecutionContext(lrVReg, wazevoapi.ExecutionContextOffsetGoReturnAddress, true, cur) - - // Then, move the Go-allocated stack pointer to SP: - // mov sp, goAllocatedStackPtr - cur = m.move64(spVReg, goAllocatedStackPtr, cur) - - prReg := paramResultSlicePtr - if len(abi.Args) > 2 && len(abi.Rets) > 0 { - // paramResultSlicePtr is modified during the execution of goEntryPreamblePassArg, - // so copy it to another reg. - cur = m.move64(paramResultSliceCopied, paramResultSlicePtr, cur) - prReg = paramResultSliceCopied - } - - stackSlotSize := int64(abi.AlignedArgResultStackSlotSize()) - for i := range abi.Args { - if i < 2 { - // module context ptr and execution context ptr are passed in x0 and x1 by the Go assembly function. - continue - } - arg := &abi.Args[i] - cur = m.goEntryPreamblePassArg(cur, prReg, arg, -stackSlotSize) - } - - // Call the real function. - bl := m.allocateInstr() - bl.asCallIndirect(functionExecutable, &abi) - cur = linkInstr(cur, bl) - - ///// ----------------------------------- epilogue ----------------------------------- ///// - - // Store the register results into paramResultSlicePtr. - for i := range abi.Rets { - cur = m.goEntryPreamblePassResult(cur, paramResultSlicePtr, &abi.Rets[i], abi.ArgStackSize-stackSlotSize) - } - - // Finally, restore the FP, SP and LR, and return to the Go code. - // ldr fp, [savedExecutionContextPtr, #OriginalFramePointer] - // ldr tmp, [savedExecutionContextPtr, #OriginalStackPointer] - // mov sp, tmp ;; sp cannot be str'ed directly. - // ldr lr, [savedExecutionContextPtr, #GoReturnAddress] - // ret ;; --> return to the Go code - cur = m.loadOrStoreAtExecutionContext(fpVReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, false, cur) - cur = m.loadOrStoreAtExecutionContext(tmpRegVReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, false, cur) - cur = m.move64(spVReg, tmpRegVReg, cur) - cur = m.loadOrStoreAtExecutionContext(lrVReg, wazevoapi.ExecutionContextOffsetGoReturnAddress, false, cur) - retInst := m.allocateInstr() - retInst.asRet() - linkInstr(cur, retInst) - return -} - -func (m *machine) move64(dst, src regalloc.VReg, prev *instruction) *instruction { - instr := m.allocateInstr() - instr.asMove64(dst, src) - return linkInstr(prev, instr) -} - -func (m *machine) loadOrStoreAtExecutionContext(d regalloc.VReg, offset wazevoapi.Offset, store bool, prev *instruction) *instruction { - instr := m.allocateInstr() - mode := m.amodePool.Allocate() - *mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()} - if store { - instr.asStore(operandNR(d), mode, 64) - } else { - instr.asULoad(d, mode, 64) - } - return linkInstr(prev, instr) -} - -func linkInstr(prev, next *instruction) *instruction { - prev.next = next - next.prev = prev - return next -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go deleted file mode 100644 index 06f8a4a05..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go +++ /dev/null @@ -1,430 +0,0 @@ -package arm64 - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -var calleeSavedRegistersSorted = []regalloc.VReg{ - x19VReg, x20VReg, x21VReg, x22VReg, x23VReg, x24VReg, x25VReg, x26VReg, x28VReg, - v18VReg, v19VReg, v20VReg, v21VReg, v22VReg, v23VReg, v24VReg, v25VReg, v26VReg, v27VReg, v28VReg, v29VReg, v30VReg, v31VReg, -} - -// CompileGoFunctionTrampoline implements backend.Machine. -func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte { - argBegin := 1 // Skips exec context by default. - if needModuleContextPtr { - argBegin++ - } - - abi := &backend.FunctionABI{} - abi.Init(sig, intParamResultRegs, floatParamResultRegs) - m.currentABI = abi - - cur := m.allocateInstr() - cur.asNop0() - m.rootInstr = cur - - // Execution context is always the first argument. - execCtrPtr := x0VReg - - // In the following, we create the following stack layout: - // - // (high address) - // SP ------> +-----------------+ <----+ - // | ....... | | - // | ret Y | | - // | ....... | | - // | ret 0 | | - // | arg X | | size_of_arg_ret - // | ....... | | - // | arg 1 | | - // | arg 0 | <----+ <-------- originalArg0Reg - // | size_of_arg_ret | - // | ReturnAddress | - // +-----------------+ <----+ - // | xxxx | | ;; might be padded to make it 16-byte aligned. - // +--->| arg[N]/ret[M] | | - // sliceSize| | ............ | | goCallStackSize - // | | arg[1]/ret[1] | | - // +--->| arg[0]/ret[0] | <----+ <-------- arg0ret0AddrReg - // | sliceSize | - // | frame_size | - // +-----------------+ - // (low address) - // - // where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions, - // therefore will be accessed as the usual []uint64. So that's where we need to pass/receive - // the arguments/return values. - - // First of all, to update the SP, and create "ReturnAddress + size_of_arg_ret". - cur = m.createReturnAddrAndSizeOfArgRetSlot(cur) - - const frameInfoSize = 16 // == frame_size + sliceSize. - - // Next, we should allocate the stack for the Go function call if necessary. - goCallStackSize, sliceSizeInBytes := backend.GoFunctionCallRequiredStackSize(sig, argBegin) - cur = m.insertStackBoundsCheck(goCallStackSize+frameInfoSize, cur) - - originalArg0Reg := x17VReg // Caller save, so we can use it for whatever we want. - if m.currentABI.AlignedArgResultStackSlotSize() > 0 { - // At this point, SP points to `ReturnAddress`, so add 16 to get the original arg 0 slot. - cur = m.addsAddOrSubStackPointer(cur, originalArg0Reg, frameInfoSize, true) - } - - // Save the callee saved registers. - cur = m.saveRegistersInExecutionContext(cur, calleeSavedRegistersSorted) - - if needModuleContextPtr { - offset := wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.I64() - if !offsetFitsInAddressModeKindRegUnsignedImm12(64, offset) { - panic("BUG: too large or un-aligned offset for goFunctionCallCalleeModuleContextOpaque in execution context") - } - - // Module context is always the second argument. - moduleCtrPtr := x1VReg - store := m.allocateInstr() - amode := m.amodePool.Allocate() - *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset} - store.asStore(operandNR(moduleCtrPtr), amode, 64) - cur = linkInstr(cur, store) - } - - // Advances the stack pointer. - cur = m.addsAddOrSubStackPointer(cur, spVReg, goCallStackSize, false) - - // Copy the pointer to x15VReg. - arg0ret0AddrReg := x15VReg // Caller save, so we can use it for whatever we want. - copySp := m.allocateInstr() - copySp.asMove64(arg0ret0AddrReg, spVReg) - cur = linkInstr(cur, copySp) - - // Next, we need to store all the arguments to the stack in the typical Wasm stack style. - for i := range abi.Args[argBegin:] { - arg := &abi.Args[argBegin+i] - store := m.allocateInstr() - var v regalloc.VReg - if arg.Kind == backend.ABIArgKindReg { - v = arg.Reg - } else { - cur, v = m.goFunctionCallLoadStackArg(cur, originalArg0Reg, arg, - // Caller save, so we can use it for whatever we want. - x11VReg, v11VReg) - } - - var sizeInBits byte - if arg.Type == ssa.TypeV128 { - sizeInBits = 128 - } else { - sizeInBits = 64 - } - amode := m.amodePool.Allocate() - *amode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8)} - store.asStore(operandNR(v), amode, sizeInBits) - cur = linkInstr(cur, store) - } - - // Finally, now that we've advanced SP to arg[0]/ret[0], we allocate `frame_size + sliceSize`. - var frameSizeReg, sliceSizeReg regalloc.VReg - if goCallStackSize > 0 { - cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, goCallStackSize) - frameSizeReg = tmpRegVReg - cur = m.lowerConstantI64AndInsert(cur, x16VReg, sliceSizeInBytes/8) - sliceSizeReg = x16VReg - } else { - frameSizeReg = xzrVReg - sliceSizeReg = xzrVReg - } - _amode := addressModePreOrPostIndex(m, spVReg, -16, true) - storeP := m.allocateInstr() - storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode) - cur = linkInstr(cur, storeP) - - // Set the exit status on the execution context. - cur = m.setExitCode(cur, x0VReg, exitCode) - - // Save the current stack pointer. - cur = m.saveCurrentStackPointer(cur, x0VReg) - - // Exit the execution. - cur = m.storeReturnAddressAndExit(cur) - - // After the call, we need to restore the callee saved registers. - cur = m.restoreRegistersInExecutionContext(cur, calleeSavedRegistersSorted) - - // Get the pointer to the arg[0]/ret[0]: We need to skip `frame_size + sliceSize`. - if len(abi.Rets) > 0 { - cur = m.addsAddOrSubStackPointer(cur, arg0ret0AddrReg, frameInfoSize, true) - } - - // Advances the SP so that it points to `ReturnAddress`. - cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true) - ldr := m.allocateInstr() - // And load the return address. - amode := addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */) - ldr.asULoad(lrVReg, amode, 64) - cur = linkInstr(cur, ldr) - - originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want. - if m.currentABI.RetStackSize > 0 { - cur = m.addsAddOrSubStackPointer(cur, originalRet0Reg, m.currentABI.ArgStackSize, true) - } - - // Make the SP point to the original address (above the result slot). - if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 { - cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true) - } - - for i := range abi.Rets { - r := &abi.Rets[i] - if r.Kind == backend.ABIArgKindReg { - loadIntoReg := m.allocateInstr() - mode := m.amodePool.Allocate() - *mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} - switch r.Type { - case ssa.TypeI32: - mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asULoad(r.Reg, mode, 32) - case ssa.TypeI64: - mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asULoad(r.Reg, mode, 64) - case ssa.TypeF32: - mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asFpuLoad(r.Reg, mode, 32) - case ssa.TypeF64: - mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asFpuLoad(r.Reg, mode, 64) - case ssa.TypeV128: - mode.imm = 16 - loadIntoReg.asFpuLoad(r.Reg, mode, 128) - default: - panic("TODO") - } - cur = linkInstr(cur, loadIntoReg) - } else { - // First we need to load the value to a temporary just like ^^. - intTmp, floatTmp := x11VReg, v11VReg - loadIntoTmpReg := m.allocateInstr() - mode := m.amodePool.Allocate() - *mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} - var resultReg regalloc.VReg - switch r.Type { - case ssa.TypeI32: - mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asULoad(intTmp, mode, 32) - resultReg = intTmp - case ssa.TypeI64: - mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asULoad(intTmp, mode, 64) - resultReg = intTmp - case ssa.TypeF32: - mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asFpuLoad(floatTmp, mode, 32) - resultReg = floatTmp - case ssa.TypeF64: - mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asFpuLoad(floatTmp, mode, 64) - resultReg = floatTmp - case ssa.TypeV128: - mode.imm = 16 - loadIntoTmpReg.asFpuLoad(floatTmp, mode, 128) - resultReg = floatTmp - default: - panic("TODO") - } - cur = linkInstr(cur, loadIntoTmpReg) - cur = m.goFunctionCallStoreStackResult(cur, originalRet0Reg, r, resultReg) - } - } - - ret := m.allocateInstr() - ret.asRet() - linkInstr(cur, ret) - - m.encode(m.rootInstr) - return m.compiler.Buf() -} - -func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction { - offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64() - for _, v := range regs { - store := m.allocateInstr() - var sizeInBits byte - switch v.RegType() { - case regalloc.RegTypeInt: - sizeInBits = 64 - case regalloc.RegTypeFloat: - sizeInBits = 128 - } - mode := m.amodePool.Allocate() - *mode = addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: offset, - } - store.asStore(operandNR(v), mode, sizeInBits) - store.prev = cur - cur.next = store - cur = store - offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally store regs at the offset of multiple of 16. - } - return cur -} - -func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction { - offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64() - for _, v := range regs { - load := m.allocateInstr() - var as func(dst regalloc.VReg, amode *addressMode, sizeInBits byte) - var sizeInBits byte - switch v.RegType() { - case regalloc.RegTypeInt: - as = load.asULoad - sizeInBits = 64 - case regalloc.RegTypeFloat: - as = load.asFpuLoad - sizeInBits = 128 - } - mode := m.amodePool.Allocate() - *mode = addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: offset, - } - as(v, mode, sizeInBits) - cur = linkInstr(cur, load) - offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16. - } - return cur -} - -func (m *machine) lowerConstantI64AndInsert(cur *instruction, dst regalloc.VReg, v int64) *instruction { - m.pendingInstructions = m.pendingInstructions[:0] - m.lowerConstantI64(dst, v) - for _, instr := range m.pendingInstructions { - cur = linkInstr(cur, instr) - } - return cur -} - -func (m *machine) lowerConstantI32AndInsert(cur *instruction, dst regalloc.VReg, v int32) *instruction { - m.pendingInstructions = m.pendingInstructions[:0] - m.lowerConstantI32(dst, v) - for _, instr := range m.pendingInstructions { - cur = linkInstr(cur, instr) - } - return cur -} - -func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode wazevoapi.ExitCode) *instruction { - constReg := x17VReg // caller-saved, so we can use it. - cur = m.lowerConstantI32AndInsert(cur, constReg, int32(exitCode)) - - // Set the exit status on the execution context. - setExistStatus := m.allocateInstr() - mode := m.amodePool.Allocate() - *mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64()} - setExistStatus.asStore(operandNR(constReg), mode, 32) - cur = linkInstr(cur, setExistStatus) - return cur -} - -func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction { - // Read the return address into tmp, and store it in the execution context. - adr := m.allocateInstr() - adr.asAdr(tmpRegVReg, exitSequenceSize+8) - cur = linkInstr(cur, adr) - - storeReturnAddr := m.allocateInstr() - mode := m.amodePool.Allocate() - *mode = addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), - } - storeReturnAddr.asStore(operandNR(tmpRegVReg), mode, 64) - cur = linkInstr(cur, storeReturnAddr) - - // Exit the execution. - trapSeq := m.allocateInstr() - trapSeq.asExitSequence(x0VReg) - cur = linkInstr(cur, trapSeq) - return cur -} - -func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VReg) *instruction { - // Save the current stack pointer: - // mov tmp, sp, - // str tmp, [exec_ctx, #stackPointerBeforeGoCall] - movSp := m.allocateInstr() - movSp.asMove64(tmpRegVReg, spVReg) - cur = linkInstr(cur, movSp) - - strSp := m.allocateInstr() - mode := m.amodePool.Allocate() - *mode = addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), - } - strSp.asStore(operandNR(tmpRegVReg), mode, 64) - cur = linkInstr(cur, strSp) - return cur -} - -func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) { - load := m.allocateInstr() - var result regalloc.VReg - mode := m.amodePool.Allocate() - *mode = addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg} - switch arg.Type { - case ssa.TypeI32: - mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asULoad(intVReg, mode, 32) - result = intVReg - case ssa.TypeI64: - mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asULoad(intVReg, mode, 64) - result = intVReg - case ssa.TypeF32: - mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asFpuLoad(floatVReg, mode, 32) - result = floatVReg - case ssa.TypeF64: - mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asFpuLoad(floatVReg, mode, 64) - result = floatVReg - case ssa.TypeV128: - mode.imm = 16 - load.asFpuLoad(floatVReg, mode, 128) - result = floatVReg - default: - panic("TODO") - } - - cur = linkInstr(cur, load) - return cur, result -} - -func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction { - store := m.allocateInstr() - mode := m.amodePool.Allocate() - *mode = addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg} - var sizeInBits byte - switch result.Type { - case ssa.TypeI32, ssa.TypeF32: - mode.imm = 8 - sizeInBits = 32 - case ssa.TypeI64, ssa.TypeF64: - mode.imm = 8 - sizeInBits = 64 - case ssa.TypeV128: - mode.imm = 16 - sizeInBits = 128 - default: - panic("TODO") - } - store.asStore(operandNR(resultVReg), mode, sizeInBits) - return linkInstr(cur, store) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/cond.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/cond.go deleted file mode 100644 index 6f6cdd1b2..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/cond.go +++ /dev/null @@ -1,215 +0,0 @@ -package arm64 - -import ( - "strconv" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -type ( - cond uint64 - condKind byte -) - -const ( - // condKindRegisterZero represents a condition which checks if the register is zero. - // This indicates that the instruction must be encoded as CBZ: - // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/CBZ--Compare-and-Branch-on-Zero- - condKindRegisterZero condKind = iota - // condKindRegisterNotZero indicates that the instruction must be encoded as CBNZ: - // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/CBNZ--Compare-and-Branch-on-Nonzero- - condKindRegisterNotZero - // condKindCondFlagSet indicates that the instruction must be encoded as B.cond: - // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally- - condKindCondFlagSet -) - -// kind returns the kind of condition which is stored in the first two bits. -func (c cond) kind() condKind { - return condKind(c & 0b11) -} - -func (c cond) asUint64() uint64 { - return uint64(c) -} - -// register returns the register for register conditions. -// This panics if the condition is not a register condition (condKindRegisterZero or condKindRegisterNotZero). -func (c cond) register() regalloc.VReg { - if c.kind() != condKindRegisterZero && c.kind() != condKindRegisterNotZero { - panic("condition is not a register") - } - return regalloc.VReg(c >> 2) -} - -func registerAsRegZeroCond(r regalloc.VReg) cond { - return cond(r)<<2 | cond(condKindRegisterZero) -} - -func registerAsRegNotZeroCond(r regalloc.VReg) cond { - return cond(r)<<2 | cond(condKindRegisterNotZero) -} - -func (c cond) flag() condFlag { - if c.kind() != condKindCondFlagSet { - panic("condition is not a flag") - } - return condFlag(c >> 2) -} - -func (c condFlag) asCond() cond { - return cond(c)<<2 | cond(condKindCondFlagSet) -} - -// condFlag represents a condition flag for conditional branches. -// The value matches the encoding of condition flags in the ARM64 instruction set. -// https://developer.arm.com/documentation/den0024/a/The-A64-instruction-set/Data-processing-instructions/Conditional-instructions -type condFlag uint8 - -const ( - eq condFlag = iota // eq represents "equal" - ne // ne represents "not equal" - hs // hs represents "higher or same" - lo // lo represents "lower" - mi // mi represents "minus or negative result" - pl // pl represents "plus or positive result" - vs // vs represents "overflow set" - vc // vc represents "overflow clear" - hi // hi represents "higher" - ls // ls represents "lower or same" - ge // ge represents "greater or equal" - lt // lt represents "less than" - gt // gt represents "greater than" - le // le represents "less than or equal" - al // al represents "always" - nv // nv represents "never" -) - -// invert returns the inverted condition. -func (c condFlag) invert() condFlag { - switch c { - case eq: - return ne - case ne: - return eq - case hs: - return lo - case lo: - return hs - case mi: - return pl - case pl: - return mi - case vs: - return vc - case vc: - return vs - case hi: - return ls - case ls: - return hi - case ge: - return lt - case lt: - return ge - case gt: - return le - case le: - return gt - case al: - return nv - case nv: - return al - default: - panic(c) - } -} - -// String implements fmt.Stringer. -func (c condFlag) String() string { - switch c { - case eq: - return "eq" - case ne: - return "ne" - case hs: - return "hs" - case lo: - return "lo" - case mi: - return "mi" - case pl: - return "pl" - case vs: - return "vs" - case vc: - return "vc" - case hi: - return "hi" - case ls: - return "ls" - case ge: - return "ge" - case lt: - return "lt" - case gt: - return "gt" - case le: - return "le" - case al: - return "al" - case nv: - return "nv" - default: - panic(strconv.Itoa(int(c))) - } -} - -// condFlagFromSSAIntegerCmpCond returns the condition flag for the given ssa.IntegerCmpCond. -func condFlagFromSSAIntegerCmpCond(c ssa.IntegerCmpCond) condFlag { - switch c { - case ssa.IntegerCmpCondEqual: - return eq - case ssa.IntegerCmpCondNotEqual: - return ne - case ssa.IntegerCmpCondSignedLessThan: - return lt - case ssa.IntegerCmpCondSignedGreaterThanOrEqual: - return ge - case ssa.IntegerCmpCondSignedGreaterThan: - return gt - case ssa.IntegerCmpCondSignedLessThanOrEqual: - return le - case ssa.IntegerCmpCondUnsignedLessThan: - return lo - case ssa.IntegerCmpCondUnsignedGreaterThanOrEqual: - return hs - case ssa.IntegerCmpCondUnsignedGreaterThan: - return hi - case ssa.IntegerCmpCondUnsignedLessThanOrEqual: - return ls - default: - panic(c) - } -} - -// condFlagFromSSAFloatCmpCond returns the condition flag for the given ssa.FloatCmpCond. -func condFlagFromSSAFloatCmpCond(c ssa.FloatCmpCond) condFlag { - switch c { - case ssa.FloatCmpCondEqual: - return eq - case ssa.FloatCmpCondNotEqual: - return ne - case ssa.FloatCmpCondLessThan: - return mi - case ssa.FloatCmpCondLessThanOrEqual: - return ls - case ssa.FloatCmpCondGreaterThan: - return gt - case ssa.FloatCmpCondGreaterThanOrEqual: - return ge - default: - panic(c) - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go deleted file mode 100644 index 1f563428a..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go +++ /dev/null @@ -1,2534 +0,0 @@ -package arm64 - -import ( - "fmt" - "math" - "unsafe" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -type ( - // instruction represents either a real instruction in arm64, or the meta instructions - // that are convenient for code generation. For example, inline constants are also treated - // as instructions. - // - // Basically, each instruction knows how to get encoded in binaries. Hence, the final output of compilation - // can be considered equivalent to the sequence of such instructions. - // - // Each field is interpreted depending on the kind. - // - // TODO: optimize the layout later once the impl settles. - instruction struct { - prev, next *instruction - u1, u2 uint64 - rd regalloc.VReg - rm, rn operand - kind instructionKind - addedBeforeRegAlloc bool - } - - // instructionKind represents the kind of instruction. - // This controls how the instruction struct is interpreted. - instructionKind byte -) - -// IsCall implements regalloc.Instr IsCall. -func (i *instruction) IsCall() bool { - return i.kind == call -} - -// IsIndirectCall implements regalloc.Instr IsIndirectCall. -func (i *instruction) IsIndirectCall() bool { - return i.kind == callInd -} - -// IsReturn implements regalloc.Instr IsReturn. -func (i *instruction) IsReturn() bool { - return i.kind == ret -} - -type defKind byte - -const ( - defKindNone defKind = iota + 1 - defKindRD - defKindCall -) - -var defKinds = [numInstructionKinds]defKind{ - adr: defKindRD, - aluRRR: defKindRD, - aluRRRR: defKindRD, - aluRRImm12: defKindRD, - aluRRBitmaskImm: defKindRD, - aluRRRShift: defKindRD, - aluRRImmShift: defKindRD, - aluRRRExtend: defKindRD, - bitRR: defKindRD, - movZ: defKindRD, - movK: defKindRD, - movN: defKindRD, - mov32: defKindRD, - mov64: defKindRD, - fpuMov64: defKindRD, - fpuMov128: defKindRD, - fpuRR: defKindRD, - fpuRRR: defKindRD, - nop0: defKindNone, - call: defKindCall, - callInd: defKindCall, - ret: defKindNone, - store8: defKindNone, - store16: defKindNone, - store32: defKindNone, - store64: defKindNone, - exitSequence: defKindNone, - condBr: defKindNone, - br: defKindNone, - brTableSequence: defKindNone, - cSet: defKindRD, - extend: defKindRD, - fpuCmp: defKindNone, - uLoad8: defKindRD, - uLoad16: defKindRD, - uLoad32: defKindRD, - sLoad8: defKindRD, - sLoad16: defKindRD, - sLoad32: defKindRD, - uLoad64: defKindRD, - fpuLoad32: defKindRD, - fpuLoad64: defKindRD, - fpuLoad128: defKindRD, - vecLoad1R: defKindRD, - loadFpuConst32: defKindRD, - loadFpuConst64: defKindRD, - loadFpuConst128: defKindRD, - fpuStore32: defKindNone, - fpuStore64: defKindNone, - fpuStore128: defKindNone, - udf: defKindNone, - cSel: defKindRD, - fpuCSel: defKindRD, - movToVec: defKindRD, - movFromVec: defKindRD, - movFromVecSigned: defKindRD, - vecDup: defKindRD, - vecDupElement: defKindRD, - vecExtract: defKindRD, - vecMisc: defKindRD, - vecMovElement: defKindRD, - vecLanes: defKindRD, - vecShiftImm: defKindRD, - vecTbl: defKindRD, - vecTbl2: defKindRD, - vecPermute: defKindRD, - vecRRR: defKindRD, - vecRRRRewrite: defKindNone, - fpuToInt: defKindRD, - intToFpu: defKindRD, - cCmpImm: defKindNone, - movToFPSR: defKindNone, - movFromFPSR: defKindRD, - emitSourceOffsetInfo: defKindNone, - atomicRmw: defKindRD, - atomicCas: defKindNone, - atomicLoad: defKindRD, - atomicStore: defKindNone, - dmb: defKindNone, - loadConstBlockArg: defKindRD, -} - -// Defs returns the list of regalloc.VReg that are defined by the instruction. -// In order to reduce the number of allocations, the caller can pass the slice to be used. -func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg { - *regs = (*regs)[:0] - switch defKinds[i.kind] { - case defKindNone: - case defKindRD: - *regs = append(*regs, i.rd) - case defKindCall: - _, _, retIntRealRegs, retFloatRealRegs, _ := backend.ABIInfoFromUint64(i.u2) - for i := byte(0); i < retIntRealRegs; i++ { - *regs = append(*regs, regInfo.RealRegToVReg[intParamResultRegs[i]]) - } - for i := byte(0); i < retFloatRealRegs; i++ { - *regs = append(*regs, regInfo.RealRegToVReg[floatParamResultRegs[i]]) - } - default: - panic(fmt.Sprintf("defKind for %v not defined", i)) - } - return *regs -} - -// AssignDef implements regalloc.Instr AssignDef. -func (i *instruction) AssignDef(reg regalloc.VReg) { - switch defKinds[i.kind] { - case defKindNone: - case defKindRD: - i.rd = reg - case defKindCall: - panic("BUG: call instructions shouldn't be assigned") - default: - panic(fmt.Sprintf("defKind for %v not defined", i)) - } -} - -type useKind byte - -const ( - useKindNone useKind = iota + 1 - useKindRN - useKindRNRM - useKindRNRMRA - useKindRNRN1RM - useKindCall - useKindCallInd - useKindAMode - useKindRNAMode - useKindCond - // useKindRDRewrite indicates an instruction where RD is used both as a source and destination. - // A temporary register for RD must be allocated explicitly with the source copied to this - // register before the instruction and the value copied from this register to the instruction - // return register. - useKindRDRewrite -) - -var useKinds = [numInstructionKinds]useKind{ - udf: useKindNone, - aluRRR: useKindRNRM, - aluRRRR: useKindRNRMRA, - aluRRImm12: useKindRN, - aluRRBitmaskImm: useKindRN, - aluRRRShift: useKindRNRM, - aluRRImmShift: useKindRN, - aluRRRExtend: useKindRNRM, - bitRR: useKindRN, - movZ: useKindNone, - movK: useKindNone, - movN: useKindNone, - mov32: useKindRN, - mov64: useKindRN, - fpuMov64: useKindRN, - fpuMov128: useKindRN, - fpuRR: useKindRN, - fpuRRR: useKindRNRM, - nop0: useKindNone, - call: useKindCall, - callInd: useKindCallInd, - ret: useKindNone, - store8: useKindRNAMode, - store16: useKindRNAMode, - store32: useKindRNAMode, - store64: useKindRNAMode, - exitSequence: useKindRN, - condBr: useKindCond, - br: useKindNone, - brTableSequence: useKindRN, - cSet: useKindNone, - extend: useKindRN, - fpuCmp: useKindRNRM, - uLoad8: useKindAMode, - uLoad16: useKindAMode, - uLoad32: useKindAMode, - sLoad8: useKindAMode, - sLoad16: useKindAMode, - sLoad32: useKindAMode, - uLoad64: useKindAMode, - fpuLoad32: useKindAMode, - fpuLoad64: useKindAMode, - fpuLoad128: useKindAMode, - fpuStore32: useKindRNAMode, - fpuStore64: useKindRNAMode, - fpuStore128: useKindRNAMode, - loadFpuConst32: useKindNone, - loadFpuConst64: useKindNone, - loadFpuConst128: useKindNone, - vecLoad1R: useKindRN, - cSel: useKindRNRM, - fpuCSel: useKindRNRM, - movToVec: useKindRN, - movFromVec: useKindRN, - movFromVecSigned: useKindRN, - vecDup: useKindRN, - vecDupElement: useKindRN, - vecExtract: useKindRNRM, - cCmpImm: useKindRN, - vecMisc: useKindRN, - vecMovElement: useKindRN, - vecLanes: useKindRN, - vecShiftImm: useKindRN, - vecTbl: useKindRNRM, - vecTbl2: useKindRNRN1RM, - vecRRR: useKindRNRM, - vecRRRRewrite: useKindRDRewrite, - vecPermute: useKindRNRM, - fpuToInt: useKindRN, - intToFpu: useKindRN, - movToFPSR: useKindRN, - movFromFPSR: useKindNone, - adr: useKindNone, - emitSourceOffsetInfo: useKindNone, - atomicRmw: useKindRNRM, - atomicCas: useKindRDRewrite, - atomicLoad: useKindRN, - atomicStore: useKindRNRM, - loadConstBlockArg: useKindNone, - dmb: useKindNone, -} - -// Uses returns the list of regalloc.VReg that are used by the instruction. -// In order to reduce the number of allocations, the caller can pass the slice to be used. -func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { - *regs = (*regs)[:0] - switch useKinds[i.kind] { - case useKindNone: - case useKindRN: - if rn := i.rn.reg(); rn.Valid() { - *regs = append(*regs, rn) - } - case useKindRNRM: - if rn := i.rn.reg(); rn.Valid() { - *regs = append(*regs, rn) - } - if rm := i.rm.reg(); rm.Valid() { - *regs = append(*regs, rm) - } - case useKindRNRMRA: - if rn := i.rn.reg(); rn.Valid() { - *regs = append(*regs, rn) - } - if rm := i.rm.reg(); rm.Valid() { - *regs = append(*regs, rm) - } - if ra := regalloc.VReg(i.u2); ra.Valid() { - *regs = append(*regs, ra) - } - case useKindRNRN1RM: - if rn := i.rn.reg(); rn.Valid() && rn.IsRealReg() { - rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType()) - *regs = append(*regs, rn, rn1) - } - if rm := i.rm.reg(); rm.Valid() { - *regs = append(*regs, rm) - } - case useKindAMode: - amode := i.getAmode() - if amodeRN := amode.rn; amodeRN.Valid() { - *regs = append(*regs, amodeRN) - } - if amodeRM := amode.rm; amodeRM.Valid() { - *regs = append(*regs, amodeRM) - } - case useKindRNAMode: - *regs = append(*regs, i.rn.reg()) - amode := i.getAmode() - if amodeRN := amode.rn; amodeRN.Valid() { - *regs = append(*regs, amodeRN) - } - if amodeRM := amode.rm; amodeRM.Valid() { - *regs = append(*regs, amodeRM) - } - case useKindCond: - cnd := cond(i.u1) - if cnd.kind() != condKindCondFlagSet { - *regs = append(*regs, cnd.register()) - } - case useKindCallInd: - *regs = append(*regs, i.rn.nr()) - fallthrough - case useKindCall: - argIntRealRegs, argFloatRealRegs, _, _, _ := backend.ABIInfoFromUint64(i.u2) - for i := byte(0); i < argIntRealRegs; i++ { - *regs = append(*regs, regInfo.RealRegToVReg[intParamResultRegs[i]]) - } - for i := byte(0); i < argFloatRealRegs; i++ { - *regs = append(*regs, regInfo.RealRegToVReg[floatParamResultRegs[i]]) - } - case useKindRDRewrite: - *regs = append(*regs, i.rn.reg()) - *regs = append(*regs, i.rm.reg()) - *regs = append(*regs, i.rd) - default: - panic(fmt.Sprintf("useKind for %v not defined", i)) - } - return *regs -} - -func (i *instruction) AssignUse(index int, reg regalloc.VReg) { - switch useKinds[i.kind] { - case useKindNone: - case useKindRN: - if rn := i.rn.reg(); rn.Valid() { - i.rn = i.rn.assignReg(reg) - } - case useKindRNRM: - if index == 0 { - if rn := i.rn.reg(); rn.Valid() { - i.rn = i.rn.assignReg(reg) - } - } else { - if rm := i.rm.reg(); rm.Valid() { - i.rm = i.rm.assignReg(reg) - } - } - case useKindRDRewrite: - if index == 0 { - if rn := i.rn.reg(); rn.Valid() { - i.rn = i.rn.assignReg(reg) - } - } else if index == 1 { - if rm := i.rm.reg(); rm.Valid() { - i.rm = i.rm.assignReg(reg) - } - } else { - if rd := i.rd; rd.Valid() { - i.rd = reg - } - } - case useKindRNRN1RM: - if index == 0 { - if rn := i.rn.reg(); rn.Valid() { - i.rn = i.rn.assignReg(reg) - } - if rn1 := i.rn.reg() + 1; rn1.Valid() { - i.rm = i.rm.assignReg(reg + 1) - } - } else { - if rm := i.rm.reg(); rm.Valid() { - i.rm = i.rm.assignReg(reg) - } - } - case useKindRNRMRA: - if index == 0 { - if rn := i.rn.reg(); rn.Valid() { - i.rn = i.rn.assignReg(reg) - } - } else if index == 1 { - if rm := i.rm.reg(); rm.Valid() { - i.rm = i.rm.assignReg(reg) - } - } else { - if ra := regalloc.VReg(i.u2); ra.Valid() { - i.u2 = uint64(reg) - } - } - case useKindAMode: - if index == 0 { - amode := i.getAmode() - if amodeRN := amode.rn; amodeRN.Valid() { - amode.rn = reg - } - } else { - amode := i.getAmode() - if amodeRM := amode.rm; amodeRM.Valid() { - amode.rm = reg - } - } - case useKindRNAMode: - if index == 0 { - i.rn = i.rn.assignReg(reg) - } else if index == 1 { - amode := i.getAmode() - if amodeRN := amode.rn; amodeRN.Valid() { - amode.rn = reg - } else { - panic("BUG") - } - } else { - amode := i.getAmode() - if amodeRM := amode.rm; amodeRM.Valid() { - amode.rm = reg - } else { - panic("BUG") - } - } - case useKindCond: - c := cond(i.u1) - switch c.kind() { - case condKindRegisterZero: - i.u1 = uint64(registerAsRegZeroCond(reg)) - case condKindRegisterNotZero: - i.u1 = uint64(registerAsRegNotZeroCond(reg)) - } - case useKindCall: - panic("BUG: call instructions shouldn't be assigned") - case useKindCallInd: - i.rn = i.rn.assignReg(reg) - default: - panic(fmt.Sprintf("useKind for %v not defined", i)) - } -} - -func (i *instruction) asCall(ref ssa.FuncRef, abi *backend.FunctionABI) { - i.kind = call - i.u1 = uint64(ref) - if abi != nil { - i.u2 = abi.ABIInfoAsUint64() - } -} - -func (i *instruction) asCallIndirect(ptr regalloc.VReg, abi *backend.FunctionABI) { - i.kind = callInd - i.rn = operandNR(ptr) - if abi != nil { - i.u2 = abi.ABIInfoAsUint64() - } -} - -func (i *instruction) callFuncRef() ssa.FuncRef { - return ssa.FuncRef(i.u1) -} - -// shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) { - i.kind = movZ - i.rd = dst - i.u1 = imm - i.u2 = uint64(shift) - if dst64bit { - i.u2 |= 1 << 32 - } -} - -// shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) { - i.kind = movK - i.rd = dst - i.u1 = imm - i.u2 = uint64(shift) - if dst64bit { - i.u2 |= 1 << 32 - } -} - -// shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) { - i.kind = movN - i.rd = dst - i.u1 = imm - i.u2 = uint64(shift) - if dst64bit { - i.u2 |= 1 << 32 - } -} - -func (i *instruction) asNop0() *instruction { - i.kind = nop0 - return i -} - -func (i *instruction) asNop0WithLabel(l label) { - i.kind = nop0 - i.u1 = uint64(l) -} - -func (i *instruction) nop0Label() label { - return label(i.u1) -} - -func (i *instruction) asRet() { - i.kind = ret -} - -func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode *addressMode) { - i.kind = storeP64 - i.rn = operandNR(src1) - i.rm = operandNR(src2) - i.setAmode(amode) -} - -func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode *addressMode) { - i.kind = loadP64 - i.rn = operandNR(src1) - i.rm = operandNR(src2) - i.setAmode(amode) -} - -func (i *instruction) asStore(src operand, amode *addressMode, sizeInBits byte) { - switch sizeInBits { - case 8: - i.kind = store8 - case 16: - i.kind = store16 - case 32: - if src.reg().RegType() == regalloc.RegTypeInt { - i.kind = store32 - } else { - i.kind = fpuStore32 - } - case 64: - if src.reg().RegType() == regalloc.RegTypeInt { - i.kind = store64 - } else { - i.kind = fpuStore64 - } - case 128: - i.kind = fpuStore128 - } - i.rn = src - i.setAmode(amode) -} - -func (i *instruction) asSLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) { - switch sizeInBits { - case 8: - i.kind = sLoad8 - case 16: - i.kind = sLoad16 - case 32: - i.kind = sLoad32 - default: - panic("BUG") - } - i.rd = dst - i.setAmode(amode) -} - -func (i *instruction) asULoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) { - switch sizeInBits { - case 8: - i.kind = uLoad8 - case 16: - i.kind = uLoad16 - case 32: - i.kind = uLoad32 - case 64: - i.kind = uLoad64 - } - i.rd = dst - i.setAmode(amode) -} - -func (i *instruction) asFpuLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) { - switch sizeInBits { - case 32: - i.kind = fpuLoad32 - case 64: - i.kind = fpuLoad64 - case 128: - i.kind = fpuLoad128 - } - i.rd = dst - i.setAmode(amode) -} - -func (i *instruction) getAmode() *addressMode { - return wazevoapi.PtrFromUintptr[addressMode](uintptr(i.u1)) -} - -func (i *instruction) setAmode(a *addressMode) { - i.u1 = uint64(uintptr(unsafe.Pointer(a))) -} - -func (i *instruction) asVecLoad1R(rd regalloc.VReg, rn operand, arr vecArrangement) { - // NOTE: currently only has support for no-offset loads, though it is suspicious that - // we would need to support offset load (that is only available for post-index). - i.kind = vecLoad1R - i.rd = rd - i.rn = rn - i.u1 = uint64(arr) -} - -func (i *instruction) asCSet(rd regalloc.VReg, mask bool, c condFlag) { - i.kind = cSet - i.rd = rd - i.u1 = uint64(c) - if mask { - i.u2 = 1 - } -} - -func (i *instruction) asCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) { - i.kind = cSel - i.rd = rd - i.rn = rn - i.rm = rm - i.u1 = uint64(c) - if _64bit { - i.u2 = 1 - } -} - -func (i *instruction) asFpuCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) { - i.kind = fpuCSel - i.rd = rd - i.rn = rn - i.rm = rm - i.u1 = uint64(c) - if _64bit { - i.u2 = 1 - } -} - -func (i *instruction) asBr(target label) { - if target == labelReturn { - panic("BUG: call site should special case for returnLabel") - } - i.kind = br - i.u1 = uint64(target) -} - -func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targetIndex, targetCounts int) { - i.kind = brTableSequence - i.rn = operandNR(indexReg) - i.u1 = uint64(targetIndex) - i.u2 = uint64(targetCounts) -} - -func (i *instruction) brTableSequenceOffsetsResolved() { - i.rm.data = 1 // indicate that the offsets are resolved, for debugging. -} - -func (i *instruction) brLabel() label { - return label(i.u1) -} - -// brOffsetResolved is called when the target label is resolved. -func (i *instruction) brOffsetResolve(offset int64) { - i.u2 = uint64(offset) - i.rm.data = 1 // indicate that the offset is resolved, for debugging. -} - -func (i *instruction) brOffset() int64 { - return int64(i.u2) -} - -// asCondBr encodes a conditional branch instruction. is64bit is only needed when cond is not flag. -func (i *instruction) asCondBr(c cond, target label, is64bit bool) { - i.kind = condBr - i.u1 = c.asUint64() - i.u2 = uint64(target) - if is64bit { - i.u2 |= 1 << 32 - } -} - -func (i *instruction) setCondBrTargets(target label) { - i.u2 = uint64(target) -} - -func (i *instruction) condBrLabel() label { - return label(i.u2) -} - -// condBrOffsetResolve is called when the target label is resolved. -func (i *instruction) condBrOffsetResolve(offset int64) { - i.rn.data = uint64(offset) - i.rn.data2 = 1 // indicate that the offset is resolved, for debugging. -} - -// condBrOffsetResolved returns true if condBrOffsetResolve is already called. -func (i *instruction) condBrOffsetResolved() bool { - return i.rn.data2 == 1 -} - -func (i *instruction) condBrOffset() int64 { - return int64(i.rn.data) -} - -func (i *instruction) condBrCond() cond { - return cond(i.u1) -} - -func (i *instruction) condBr64bit() bool { - return i.u2&(1<<32) != 0 -} - -func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) { - i.kind = loadFpuConst32 - i.u1 = raw - i.rd = rd -} - -func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) { - i.kind = loadFpuConst64 - i.u1 = raw - i.rd = rd -} - -func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) { - i.kind = loadFpuConst128 - i.u1 = lo - i.u2 = hi - i.rd = rd -} - -func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) { - i.kind = fpuCmp - i.rn, i.rm = rn, rm - if is64bit { - i.u1 = 1 - } -} - -func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, is64bit bool) { - i.kind = cCmpImm - i.rn = rn - i.rm.data = imm - i.u1 = uint64(c) - i.u2 = uint64(flag) - if is64bit { - i.u2 |= 1 << 32 - } -} - -// asALU setups a basic ALU instruction. -func (i *instruction) asALU(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) { - switch rm.kind { - case operandKindNR: - i.kind = aluRRR - case operandKindSR: - i.kind = aluRRRShift - case operandKindER: - i.kind = aluRRRExtend - case operandKindImm12: - i.kind = aluRRImm12 - default: - panic("BUG") - } - i.u1 = uint64(aluOp) - i.rd, i.rn, i.rm = rd, rn, rm - if dst64bit { - i.u2 |= 1 << 32 - } -} - -// asALU setups a basic ALU instruction. -func (i *instruction) asALURRRR(aluOp aluOp, rd regalloc.VReg, rn, rm operand, ra regalloc.VReg, dst64bit bool) { - i.kind = aluRRRR - i.u1 = uint64(aluOp) - i.rd, i.rn, i.rm, i.u2 = rd, rn, rm, uint64(ra) - if dst64bit { - i.u1 |= 1 << 32 - } -} - -// asALUShift setups a shift based ALU instruction. -func (i *instruction) asALUShift(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) { - switch rm.kind { - case operandKindNR: - i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands. - case operandKindShiftImm: - i.kind = aluRRImmShift - default: - panic("BUG") - } - i.u1 = uint64(aluOp) - i.rd, i.rn, i.rm = rd, rn, rm - if dst64bit { - i.u2 |= 1 << 32 - } -} - -func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) { - i.kind = aluRRBitmaskImm - i.u1 = uint64(aluOp) - i.rn, i.rd = operandNR(rn), rd - i.u2 = imm - if dst64bit { - i.u1 |= 1 << 32 - } -} - -func (i *instruction) asMovToFPSR(rn regalloc.VReg) { - i.kind = movToFPSR - i.rn = operandNR(rn) -} - -func (i *instruction) asMovFromFPSR(rd regalloc.VReg) { - i.kind = movFromFPSR - i.rd = rd -} - -func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) { - i.kind = bitRR - i.rn, i.rd = operandNR(rn), rd - i.u1 = uint64(bitOp) - if is64bit { - i.u2 = 1 - } -} - -func (i *instruction) asFpuRRR(op fpuBinOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) { - i.kind = fpuRRR - i.u1 = uint64(op) - i.rd, i.rn, i.rm = rd, rn, rm - if dst64bit { - i.u2 = 1 - } -} - -func (i *instruction) asFpuRR(op fpuUniOp, rd regalloc.VReg, rn operand, dst64bit bool) { - i.kind = fpuRR - i.u1 = uint64(op) - i.rd, i.rn = rd, rn - if dst64bit { - i.u2 = 1 - } -} - -func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) { - i.kind = extend - i.rn, i.rd = operandNR(rn), rd - i.u1 = uint64(fromBits) - i.u2 = uint64(toBits) - if signed { - i.u2 |= 1 << 32 - } -} - -func (i *instruction) asMove32(rd, rn regalloc.VReg) { - i.kind = mov32 - i.rn, i.rd = operandNR(rn), rd -} - -func (i *instruction) asMove64(rd, rn regalloc.VReg) *instruction { - i.kind = mov64 - i.rn, i.rd = operandNR(rn), rd - return i -} - -func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) { - i.kind = fpuMov64 - i.rn, i.rd = operandNR(rn), rd -} - -func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) *instruction { - i.kind = fpuMov128 - i.rn, i.rd = operandNR(rn), rd - return i -} - -func (i *instruction) asMovToVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) { - i.kind = movToVec - i.rd = rd - i.rn = rn - i.u1, i.u2 = uint64(arr), uint64(index) -} - -func (i *instruction) asMovFromVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex, signed bool) { - if signed { - i.kind = movFromVecSigned - } else { - i.kind = movFromVec - } - i.rd = rd - i.rn = rn - i.u1, i.u2 = uint64(arr), uint64(index) -} - -func (i *instruction) asVecDup(rd regalloc.VReg, rn operand, arr vecArrangement) { - i.kind = vecDup - i.u1 = uint64(arr) - i.rn, i.rd = rn, rd -} - -func (i *instruction) asVecDupElement(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) { - i.kind = vecDupElement - i.u1 = uint64(arr) - i.rn, i.rd = rn, rd - i.u2 = uint64(index) -} - -func (i *instruction) asVecExtract(rd regalloc.VReg, rn, rm operand, arr vecArrangement, index uint32) { - i.kind = vecExtract - i.u1 = uint64(arr) - i.rn, i.rm, i.rd = rn, rm, rd - i.u2 = uint64(index) -} - -func (i *instruction) asVecMovElement(rd regalloc.VReg, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) { - i.kind = vecMovElement - i.u1 = uint64(arr) - i.u2 = uint64(rdIndex) | uint64(rnIndex)<<32 - i.rn, i.rd = rn, rd -} - -func (i *instruction) asVecMisc(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) { - i.kind = vecMisc - i.u1 = uint64(op) - i.rn, i.rd = rn, rd - i.u2 = uint64(arr) -} - -func (i *instruction) asVecLanes(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) { - i.kind = vecLanes - i.u1 = uint64(op) - i.rn, i.rd = rn, rd - i.u2 = uint64(arr) -} - -func (i *instruction) asVecShiftImm(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction { - i.kind = vecShiftImm - i.u1 = uint64(op) - i.rn, i.rm, i.rd = rn, rm, rd - i.u2 = uint64(arr) - return i -} - -func (i *instruction) asVecTbl(nregs byte, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { - switch nregs { - case 0, 1: - i.kind = vecTbl - case 2: - i.kind = vecTbl2 - if !rn.reg().IsRealReg() { - panic("rn is not a RealReg") - } - if rn.realReg() == v31 { - panic("rn cannot be v31") - } - default: - panic(fmt.Sprintf("unsupported number of registers %d", nregs)) - } - i.rn, i.rm, i.rd = rn, rm, rd - i.u2 = uint64(arr) -} - -func (i *instruction) asVecPermute(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { - i.kind = vecPermute - i.u1 = uint64(op) - i.rn, i.rm, i.rd = rn, rm, rd - i.u2 = uint64(arr) -} - -func (i *instruction) asVecRRR(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction { - i.kind = vecRRR - i.u1 = uint64(op) - i.rn, i.rd, i.rm = rn, rd, rm - i.u2 = uint64(arr) - return i -} - -// asVecRRRRewrite encodes a vector instruction that rewrites the destination register. -// IMPORTANT: the destination register must be already defined before this instruction. -func (i *instruction) asVecRRRRewrite(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { - i.kind = vecRRRRewrite - i.u1 = uint64(op) - i.rn, i.rd, i.rm = rn, rd, rm - i.u2 = uint64(arr) -} - -func (i *instruction) IsCopy() bool { - op := i.kind - // We do not include mov32 as it is not a copy instruction in the sense that it does not preserve the upper 32 bits, - // and it is only used in the translation of IReduce, not the actual copy indeed. - return op == mov64 || op == fpuMov64 || op == fpuMov128 -} - -// String implements fmt.Stringer. -func (i *instruction) String() (str string) { - is64SizeBitToSize := func(v uint64) byte { - if v == 0 { - return 32 - } - return 64 - } - - switch i.kind { - case nop0: - if i.u1 != 0 { - l := label(i.u1) - str = fmt.Sprintf("%s:", l) - } else { - str = "nop0" - } - case aluRRR: - size := is64SizeBitToSize(i.u2 >> 32) - str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), - i.rm.format(size)) - case aluRRRR: - size := is64SizeBitToSize(i.u1 >> 32) - str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(regalloc.VReg(i.u2), size)) - case aluRRImm12: - size := is64SizeBitToSize(i.u2 >> 32) - str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) - case aluRRBitmaskImm: - size := is64SizeBitToSize(i.u1 >> 32) - rd, rn := formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size) - if size == 32 { - str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2)) - } else { - str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2) - } - case aluRRImmShift: - size := is64SizeBitToSize(i.u2 >> 32) - str = fmt.Sprintf("%s %s, %s, %#x", - aluOp(i.u1).String(), - formatVRegSized(i.rd, size), - formatVRegSized(i.rn.nr(), size), - i.rm.shiftImm(), - ) - case aluRRRShift: - size := is64SizeBitToSize(i.u2 >> 32) - str = fmt.Sprintf("%s %s, %s, %s", - aluOp(i.u1).String(), - formatVRegSized(i.rd, size), - formatVRegSized(i.rn.nr(), size), - i.rm.format(size), - ) - case aluRRRExtend: - size := is64SizeBitToSize(i.u2 >> 32) - str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd, size), - formatVRegSized(i.rn.nr(), size), - // Regardless of the source size, the register is formatted in 32-bit. - i.rm.format(32), - ) - case bitRR: - size := is64SizeBitToSize(i.u2) - str = fmt.Sprintf("%s %s, %s", - bitOp(i.u1), - formatVRegSized(i.rd, size), - formatVRegSized(i.rn.nr(), size), - ) - case uLoad8: - str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) - case sLoad8: - str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) - case uLoad16: - str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) - case sLoad16: - str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) - case uLoad32: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) - case sLoad32: - str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) - case uLoad64: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64)) - case store8: - str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(8)) - case store16: - str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(16)) - case store32: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(32)) - case store64: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64)) - case storeP64: - str = fmt.Sprintf("stp %s, %s, %s", - formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64)) - case loadP64: - str = fmt.Sprintf("ldp %s, %s, %s", - formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64)) - case mov64: - str = fmt.Sprintf("mov %s, %s", - formatVRegSized(i.rd, 64), - formatVRegSized(i.rn.nr(), 64)) - case mov32: - str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd, 32), formatVRegSized(i.rn.nr(), 32)) - case movZ: - size := is64SizeBitToSize(i.u2 >> 32) - str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16) - case movN: - size := is64SizeBitToSize(i.u2 >> 32) - str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16) - case movK: - size := is64SizeBitToSize(i.u2 >> 32) - str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16) - case extend: - fromBits, toBits := byte(i.u1), byte(i.u2) - - var signedStr string - if i.u2>>32 == 1 { - signedStr = "s" - } else { - signedStr = "u" - } - var fromStr string - switch fromBits { - case 8: - fromStr = "b" - case 16: - fromStr = "h" - case 32: - fromStr = "w" - } - str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd, toBits), formatVRegSized(i.rn.nr(), 32)) - case cSel: - size := is64SizeBitToSize(i.u2) - str = fmt.Sprintf("csel %s, %s, %s, %s", - formatVRegSized(i.rd, size), - formatVRegSized(i.rn.nr(), size), - formatVRegSized(i.rm.nr(), size), - condFlag(i.u1), - ) - case cSet: - if i.u2 != 0 { - str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1)) - } else { - str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1)) - } - case cCmpImm: - size := is64SizeBitToSize(i.u2 >> 32) - str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s", - formatVRegSized(i.rn.nr(), size), i.rm.data, - i.u2&0b1111, - condFlag(i.u1)) - case fpuMov64: - str = fmt.Sprintf("mov %s, %s", - formatVRegVec(i.rd, vecArrangement8B, vecIndexNone), - formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone)) - case fpuMov128: - str = fmt.Sprintf("mov %s, %s", - formatVRegVec(i.rd, vecArrangement16B, vecIndexNone), - formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone)) - case fpuMovFromVec: - panic("TODO") - case fpuRR: - dstSz := is64SizeBitToSize(i.u2) - srcSz := dstSz - op := fpuUniOp(i.u1) - switch op { - case fpuUniOpCvt32To64: - srcSz = 32 - case fpuUniOpCvt64To32: - srcSz = 64 - } - str = fmt.Sprintf("%s %s, %s", op.String(), - formatVRegSized(i.rd, dstSz), formatVRegSized(i.rn.nr(), srcSz)) - case fpuRRR: - size := is64SizeBitToSize(i.u2) - str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(), - formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) - case fpuRRI: - panic("TODO") - case fpuRRRR: - panic("TODO") - case fpuCmp: - size := is64SizeBitToSize(i.u1) - str = fmt.Sprintf("fcmp %s, %s", - formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) - case fpuLoad32: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) - case fpuStore32: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(64)) - case fpuLoad64: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64)) - case fpuStore64: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64)) - case fpuLoad128: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 128), i.getAmode().format(64)) - case fpuStore128: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.getAmode().format(64)) - case loadFpuConst32: - str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd, 32), math.Float32frombits(uint32(i.u1))) - case loadFpuConst64: - str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd, 64), math.Float64frombits(i.u1)) - case loadFpuConst128: - str = fmt.Sprintf("ldr %s, #8; b 32; data.v128 %016x %016x", - formatVRegSized(i.rd, 128), i.u1, i.u2) - case fpuToInt: - var op, src, dst string - if signed := i.u1 == 1; signed { - op = "fcvtzs" - } else { - op = "fcvtzu" - } - if src64 := i.u2&1 != 0; src64 { - src = formatVRegWidthVec(i.rn.nr(), vecArrangementD) - } else { - src = formatVRegWidthVec(i.rn.nr(), vecArrangementS) - } - if dst64 := i.u2&2 != 0; dst64 { - dst = formatVRegSized(i.rd, 64) - } else { - dst = formatVRegSized(i.rd, 32) - } - str = fmt.Sprintf("%s %s, %s", op, dst, src) - - case intToFpu: - var op, src, dst string - if signed := i.u1 == 1; signed { - op = "scvtf" - } else { - op = "ucvtf" - } - if src64 := i.u2&1 != 0; src64 { - src = formatVRegSized(i.rn.nr(), 64) - } else { - src = formatVRegSized(i.rn.nr(), 32) - } - if dst64 := i.u2&2 != 0; dst64 { - dst = formatVRegWidthVec(i.rd, vecArrangementD) - } else { - dst = formatVRegWidthVec(i.rd, vecArrangementS) - } - str = fmt.Sprintf("%s %s, %s", op, dst, src) - case fpuCSel: - size := is64SizeBitToSize(i.u2) - str = fmt.Sprintf("fcsel %s, %s, %s, %s", - formatVRegSized(i.rd, size), - formatVRegSized(i.rn.nr(), size), - formatVRegSized(i.rm.nr(), size), - condFlag(i.u1), - ) - case movToVec: - var size byte - arr := vecArrangement(i.u1) - switch arr { - case vecArrangementB, vecArrangementH, vecArrangementS: - size = 32 - case vecArrangementD: - size = 64 - default: - panic("unsupported arrangement " + arr.String()) - } - str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd, arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size)) - case movFromVec, movFromVecSigned: - var size byte - var opcode string - arr := vecArrangement(i.u1) - signed := i.kind == movFromVecSigned - switch arr { - case vecArrangementB, vecArrangementH, vecArrangementS: - size = 32 - if signed { - opcode = "smov" - } else { - opcode = "umov" - } - case vecArrangementD: - size = 64 - if signed { - opcode = "smov" - } else { - opcode = "mov" - } - default: - panic("unsupported arrangement " + arr.String()) - } - str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd, size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2))) - case vecDup: - str = fmt.Sprintf("dup %s, %s", - formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), - formatVRegSized(i.rn.nr(), 64), - ) - case vecDupElement: - arr := vecArrangement(i.u1) - str = fmt.Sprintf("dup %s, %s", - formatVRegVec(i.rd, arr, vecIndexNone), - formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)), - ) - case vecDupFromFpu: - panic("TODO") - case vecExtract: - str = fmt.Sprintf("ext %s, %s, %s, #%d", - formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), - formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone), - formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone), - uint32(i.u2), - ) - case vecExtend: - panic("TODO") - case vecMovElement: - str = fmt.Sprintf("mov %s, %s", - formatVRegVec(i.rd, vecArrangement(i.u1), vecIndex(i.u2&0xffffffff)), - formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u2>>32)), - ) - case vecMiscNarrow: - panic("TODO") - case vecRRR, vecRRRRewrite: - str = fmt.Sprintf("%s %s, %s, %s", - vecOp(i.u1), - formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone), - formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone), - formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone), - ) - case vecMisc: - vop := vecOp(i.u1) - if vop == vecOpCmeq0 { - str = fmt.Sprintf("cmeq %s, %s, #0", - formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone), - formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) - } else { - str = fmt.Sprintf("%s %s, %s", - vop, - formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone), - formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) - } - case vecLanes: - arr := vecArrangement(i.u2) - var destArr vecArrangement - switch arr { - case vecArrangement8B, vecArrangement16B: - destArr = vecArrangementH - case vecArrangement4H, vecArrangement8H: - destArr = vecArrangementS - case vecArrangement4S: - destArr = vecArrangementD - default: - panic("invalid arrangement " + arr.String()) - } - str = fmt.Sprintf("%s %s, %s", - vecOp(i.u1), - formatVRegWidthVec(i.rd, destArr), - formatVRegVec(i.rn.nr(), arr, vecIndexNone)) - case vecShiftImm: - arr := vecArrangement(i.u2) - str = fmt.Sprintf("%s %s, %s, #%d", - vecOp(i.u1), - formatVRegVec(i.rd, arr, vecIndexNone), - formatVRegVec(i.rn.nr(), arr, vecIndexNone), - i.rm.shiftImm()) - case vecTbl: - arr := vecArrangement(i.u2) - str = fmt.Sprintf("tbl %s, { %s }, %s", - formatVRegVec(i.rd, arr, vecIndexNone), - formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone), - formatVRegVec(i.rm.nr(), arr, vecIndexNone)) - case vecTbl2: - arr := vecArrangement(i.u2) - rd, rn, rm := i.rd, i.rn.nr(), i.rm.nr() - rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType()) - str = fmt.Sprintf("tbl %s, { %s, %s }, %s", - formatVRegVec(rd, arr, vecIndexNone), - formatVRegVec(rn, vecArrangement16B, vecIndexNone), - formatVRegVec(rn1, vecArrangement16B, vecIndexNone), - formatVRegVec(rm, arr, vecIndexNone)) - case vecPermute: - arr := vecArrangement(i.u2) - str = fmt.Sprintf("%s %s, %s, %s", - vecOp(i.u1), - formatVRegVec(i.rd, arr, vecIndexNone), - formatVRegVec(i.rn.nr(), arr, vecIndexNone), - formatVRegVec(i.rm.nr(), arr, vecIndexNone)) - case movToFPSR: - str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64)) - case movFromFPSR: - str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd, 64)) - case call: - str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1)) - case callInd: - str = fmt.Sprintf("bl %s", formatVRegSized(i.rn.nr(), 64)) - case ret: - str = "ret" - case br: - target := label(i.u1) - if i.rm.data != 0 { - str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String()) - } else { - str = fmt.Sprintf("b %s", target.String()) - } - case condBr: - size := is64SizeBitToSize(i.u2 >> 32) - c := cond(i.u1) - target := label(i.u2 & 0xffffffff) - switch c.kind() { - case condKindRegisterZero: - if !i.condBrOffsetResolved() { - str = fmt.Sprintf("cbz %s, (%s)", formatVRegSized(c.register(), size), target.String()) - } else { - str = fmt.Sprintf("cbz %s, #%#x %s", formatVRegSized(c.register(), size), i.condBrOffset(), target.String()) - } - case condKindRegisterNotZero: - if offset := i.condBrOffset(); offset != 0 { - str = fmt.Sprintf("cbnz %s, #%#x (%s)", formatVRegSized(c.register(), size), offset, target.String()) - } else { - str = fmt.Sprintf("cbnz %s, %s", formatVRegSized(c.register(), size), target.String()) - } - case condKindCondFlagSet: - if offset := i.condBrOffset(); offset != 0 { - if target == labelInvalid { - str = fmt.Sprintf("b.%s #%#x", c.flag(), offset) - } else { - str = fmt.Sprintf("b.%s #%#x, (%s)", c.flag(), offset, target.String()) - } - } else { - str = fmt.Sprintf("b.%s %s", c.flag(), target.String()) - } - } - case adr: - str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd, 64), int64(i.u1)) - case brTableSequence: - targetIndex := i.u1 - str = fmt.Sprintf("br_table_sequence %s, table_index=%d", formatVRegSized(i.rn.nr(), 64), targetIndex) - case exitSequence: - str = fmt.Sprintf("exit_sequence %s", formatVRegSized(i.rn.nr(), 64)) - case atomicRmw: - m := atomicRmwOp(i.u1).String() - size := byte(32) - switch i.u2 { - case 8: - size = 64 - case 2: - m = m + "h" - case 1: - m = m + "b" - } - str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64)) - case atomicCas: - m := "casal" - size := byte(32) - switch i.u2 { - case 8: - size = 64 - case 2: - m = m + "h" - case 1: - m = m + "b" - } - str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64)) - case atomicLoad: - m := "ldar" - size := byte(32) - switch i.u2 { - case 8: - size = 64 - case 2: - m = m + "h" - case 1: - m = m + "b" - } - str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64)) - case atomicStore: - m := "stlr" - size := byte(32) - switch i.u2 { - case 8: - size = 64 - case 2: - m = m + "h" - case 1: - m = m + "b" - } - str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64)) - case dmb: - str = "dmb" - case udf: - str = "udf" - case emitSourceOffsetInfo: - str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1)) - case vecLoad1R: - str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64)) - case loadConstBlockArg: - str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd, 64), i.u1) - default: - panic(i.kind) - } - return -} - -func (i *instruction) asAdr(rd regalloc.VReg, offset int64) { - i.kind = adr - i.rd = rd - i.u1 = uint64(offset) -} - -func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt regalloc.VReg, size uint64) { - i.kind = atomicRmw - i.rd, i.rn, i.rm = rt, operandNR(rn), operandNR(rs) - i.u1 = uint64(op) - i.u2 = size -} - -func (i *instruction) asAtomicCas(rn, rs, rt regalloc.VReg, size uint64) { - i.kind = atomicCas - i.rm, i.rn, i.rd = operandNR(rt), operandNR(rn), rs - i.u2 = size -} - -func (i *instruction) asAtomicLoad(rn, rt regalloc.VReg, size uint64) { - i.kind = atomicLoad - i.rn, i.rd = operandNR(rn), rt - i.u2 = size -} - -func (i *instruction) asAtomicStore(rn, rt operand, size uint64) { - i.kind = atomicStore - i.rn, i.rm = rn, rt - i.u2 = size -} - -func (i *instruction) asDMB() { - i.kind = dmb -} - -// TODO: delete unnecessary things. -const ( - // nop0 represents a no-op of zero size. - nop0 instructionKind = iota + 1 - // aluRRR represents an ALU operation with two register sources and a register destination. - aluRRR - // aluRRRR represents an ALU operation with three register sources and a register destination. - aluRRRR - // aluRRImm12 represents an ALU operation with a register source and an immediate-12 source, with a register destination. - aluRRImm12 - // aluRRBitmaskImm represents an ALU operation with a register source and a bitmask immediate, with a register destination. - aluRRBitmaskImm - // aluRRImmShift represents an ALU operation with a register source and an immediate-shifted source, with a register destination. - aluRRImmShift - // aluRRRShift represents an ALU operation with two register sources, one of which can be shifted, with a register destination. - aluRRRShift - // aluRRRExtend represents an ALU operation with two register sources, one of which can be extended, with a register destination. - aluRRRExtend - // bitRR represents a bit op instruction with a single register source. - bitRR - // uLoad8 represents an unsigned 8-bit load. - uLoad8 - // sLoad8 represents a signed 8-bit load into 64-bit register. - sLoad8 - // uLoad16 represents an unsigned 16-bit load into 64-bit register. - uLoad16 - // sLoad16 represents a signed 16-bit load into 64-bit register. - sLoad16 - // uLoad32 represents an unsigned 32-bit load into 64-bit register. - uLoad32 - // sLoad32 represents a signed 32-bit load into 64-bit register. - sLoad32 - // uLoad64 represents a 64-bit load. - uLoad64 - // store8 represents an 8-bit store. - store8 - // store16 represents a 16-bit store. - store16 - // store32 represents a 32-bit store. - store32 - // store64 represents a 64-bit store. - store64 - // storeP64 represents a store of a pair of registers. - storeP64 - // loadP64 represents a load of a pair of registers. - loadP64 - // mov64 represents a MOV instruction. These are encoded as ORR's but we keep them separate for better handling. - mov64 - // mov32 represents a 32-bit MOV. This zeroes the top 32 bits of the destination. - mov32 - // movZ represents a MOVZ with a 16-bit immediate. - movZ - // movN represents a MOVN with a 16-bit immediate. - movN - // movK represents a MOVK with a 16-bit immediate. - movK - // extend represents a sign- or zero-extend operation. - extend - // cSel represents a conditional-select operation. - cSel - // cSet represents a conditional-set operation. - cSet - // cCmpImm represents a conditional comparison with an immediate. - cCmpImm - // fpuMov64 represents a FPU move. Distinct from a vector-register move; moving just 64 bits appears to be significantly faster. - fpuMov64 - // fpuMov128 represents a vector register move. - fpuMov128 - // fpuMovFromVec represents a move to scalar from a vector element. - fpuMovFromVec - // fpuRR represents a 1-op FPU instruction. - fpuRR - // fpuRRR represents a 2-op FPU instruction. - fpuRRR - // fpuRRI represents a 2-op FPU instruction with immediate value. - fpuRRI - // fpuRRRR represents a 3-op FPU instruction. - fpuRRRR - // fpuCmp represents a FPU comparison, either 32 or 64 bit. - fpuCmp - // fpuLoad32 represents a floating-point load, single-precision (32 bit). - fpuLoad32 - // fpuStore32 represents a floating-point store, single-precision (32 bit). - fpuStore32 - // fpuLoad64 represents a floating-point load, double-precision (64 bit). - fpuLoad64 - // fpuStore64 represents a floating-point store, double-precision (64 bit). - fpuStore64 - // fpuLoad128 represents a floating-point/vector load, 128 bit. - fpuLoad128 - // fpuStore128 represents a floating-point/vector store, 128 bit. - fpuStore128 - // loadFpuConst32 represents a load of a 32-bit floating-point constant. - loadFpuConst32 - // loadFpuConst64 represents a load of a 64-bit floating-point constant. - loadFpuConst64 - // loadFpuConst128 represents a load of a 128-bit floating-point constant. - loadFpuConst128 - // vecLoad1R represents a load of a one single-element structure that replicates to all lanes of a vector. - vecLoad1R - // fpuToInt represents a conversion from FP to integer. - fpuToInt - // intToFpu represents a conversion from integer to FP. - intToFpu - // fpuCSel represents a 32/64-bit FP conditional select. - fpuCSel - // movToVec represents a move to a vector element from a GPR. - movToVec - // movFromVec represents an unsigned move from a vector element to a GPR. - movFromVec - // movFromVecSigned represents a signed move from a vector element to a GPR. - movFromVecSigned - // vecDup represents a duplication of general-purpose register to vector. - vecDup - // vecDupElement represents a duplication of a vector element to vector or scalar. - vecDupElement - // vecDupFromFpu represents a duplication of scalar to vector. - vecDupFromFpu - // vecExtract represents a vector extraction operation. - vecExtract - // vecExtend represents a vector extension operation. - vecExtend - // vecMovElement represents a move vector element to another vector element operation. - vecMovElement - // vecMiscNarrow represents a vector narrowing operation. - vecMiscNarrow - // vecRRR represents a vector ALU operation. - vecRRR - // vecRRRRewrite is exactly the same as vecRRR except that this rewrites the destination register. - // For example, BSL instruction rewrites the destination register, and the existing value influences the result. - // Therefore, the "destination" register in vecRRRRewrite will be treated as "use" which makes the register outlive - // the instruction while this instruction doesn't have "def" in the context of register allocation. - vecRRRRewrite - // vecMisc represents a vector two register miscellaneous instruction. - vecMisc - // vecLanes represents a vector instruction across lanes. - vecLanes - // vecShiftImm represents a SIMD scalar shift by immediate instruction. - vecShiftImm - // vecTbl represents a table vector lookup - single register table. - vecTbl - // vecTbl2 represents a table vector lookup - two register table. - vecTbl2 - // vecPermute represents a vector permute instruction. - vecPermute - // movToNZCV represents a move to the FPSR. - movToFPSR - // movFromNZCV represents a move from the FPSR. - movFromFPSR - // call represents a machine call instruction. - call - // callInd represents a machine indirect-call instruction. - callInd - // ret represents a machine return instruction. - ret - // br represents an unconditional branch. - br - // condBr represents a conditional branch. - condBr - // adr represents a compute the address (using a PC-relative offset) of a memory location. - adr - // brTableSequence represents a jump-table sequence. - brTableSequence - // exitSequence consists of multiple instructions, and exits the execution immediately. - // See encodeExitSequence. - exitSequence - // atomicRmw represents an atomic read-modify-write operation with two register sources and a register destination. - atomicRmw - // atomicCas represents an atomic compare-and-swap operation with three register sources. The value is loaded to - // the source register containing the comparison value. - atomicCas - // atomicLoad represents an atomic load with one source register and a register destination. - atomicLoad - // atomicStore represents an atomic store with two source registers and no destination. - atomicStore - // dmb represents the data memory barrier instruction in inner-shareable (ish) mode. - dmb - // UDF is the undefined instruction. For debugging only. - udf - // loadConstBlockArg represents a load of a constant block argument. - loadConstBlockArg - - // emitSourceOffsetInfo is a dummy instruction to emit source offset info. - // The existence of this instruction does not affect the execution. - emitSourceOffsetInfo - - // ------------------- do not define below this line ------------------- - numInstructionKinds -) - -func (i *instruction) asLoadConstBlockArg(v uint64, typ ssa.Type, dst regalloc.VReg) *instruction { - i.kind = loadConstBlockArg - i.u1 = v - i.u2 = uint64(typ) - i.rd = dst - return i -} - -func (i *instruction) loadConstBlockArgData() (v uint64, typ ssa.Type, dst regalloc.VReg) { - return i.u1, ssa.Type(i.u2), i.rd -} - -func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction { - i.kind = emitSourceOffsetInfo - i.u1 = uint64(l) - return i -} - -func (i *instruction) sourceOffsetInfo() ssa.SourceOffset { - return ssa.SourceOffset(i.u1) -} - -func (i *instruction) asUDF() *instruction { - i.kind = udf - return i -} - -func (i *instruction) asFpuToInt(rd regalloc.VReg, rn operand, rdSigned, src64bit, dst64bit bool) { - i.kind = fpuToInt - i.rn = rn - i.rd = rd - if rdSigned { - i.u1 = 1 - } - if src64bit { - i.u2 = 1 - } - if dst64bit { - i.u2 |= 2 - } -} - -func (i *instruction) asIntToFpu(rd regalloc.VReg, rn operand, rnSigned, src64bit, dst64bit bool) { - i.kind = intToFpu - i.rn = rn - i.rd = rd - if rnSigned { - i.u1 = 1 - } - if src64bit { - i.u2 = 1 - } - if dst64bit { - i.u2 |= 2 - } -} - -func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction { - i.kind = exitSequence - i.rn = operandNR(ctx) - return i -} - -// aluOp determines the type of ALU operation. Instructions whose kind is one of -// aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend -// would use this type. -type aluOp uint32 - -func (a aluOp) String() string { - switch a { - case aluOpAdd: - return "add" - case aluOpSub: - return "sub" - case aluOpOrr: - return "orr" - case aluOpOrn: - return "orn" - case aluOpAnd: - return "and" - case aluOpAnds: - return "ands" - case aluOpBic: - return "bic" - case aluOpEor: - return "eor" - case aluOpAddS: - return "adds" - case aluOpSubS: - return "subs" - case aluOpSMulH: - return "sMulH" - case aluOpUMulH: - return "uMulH" - case aluOpSDiv: - return "sdiv" - case aluOpUDiv: - return "udiv" - case aluOpRotR: - return "ror" - case aluOpLsr: - return "lsr" - case aluOpAsr: - return "asr" - case aluOpLsl: - return "lsl" - case aluOpMAdd: - return "madd" - case aluOpMSub: - return "msub" - } - panic(int(a)) -} - -const ( - // 32/64-bit Add. - aluOpAdd aluOp = iota - // 32/64-bit Subtract. - aluOpSub - // 32/64-bit Bitwise OR. - aluOpOrr - // 32/64-bit Bitwise OR NOT. - aluOpOrn - // 32/64-bit Bitwise AND. - aluOpAnd - // 32/64-bit Bitwise ANDS. - aluOpAnds - // 32/64-bit Bitwise AND NOT. - aluOpBic - // 32/64-bit Bitwise XOR (Exclusive OR). - aluOpEor - // 32/64-bit Add setting flags. - aluOpAddS - // 32/64-bit Subtract setting flags. - aluOpSubS - // Signed multiply, high-word result. - aluOpSMulH - // Unsigned multiply, high-word result. - aluOpUMulH - // 64-bit Signed divide. - aluOpSDiv - // 64-bit Unsigned divide. - aluOpUDiv - // 32/64-bit Rotate right. - aluOpRotR - // 32/64-bit Logical shift right. - aluOpLsr - // 32/64-bit Arithmetic shift right. - aluOpAsr - // 32/64-bit Logical shift left. - aluOpLsl /// Multiply-add - - // MAdd and MSub are only applicable for aluRRRR. - aluOpMAdd - aluOpMSub -) - -// vecOp determines the type of vector operation. Instructions whose kind is one of -// vecOpCnt would use this type. -type vecOp int - -// String implements fmt.Stringer. -func (b vecOp) String() string { - switch b { - case vecOpCnt: - return "cnt" - case vecOpCmeq: - return "cmeq" - case vecOpCmgt: - return "cmgt" - case vecOpCmhi: - return "cmhi" - case vecOpCmge: - return "cmge" - case vecOpCmhs: - return "cmhs" - case vecOpFcmeq: - return "fcmeq" - case vecOpFcmgt: - return "fcmgt" - case vecOpFcmge: - return "fcmge" - case vecOpCmeq0: - return "cmeq0" - case vecOpUaddlv: - return "uaddlv" - case vecOpBit: - return "bit" - case vecOpBic: - return "bic" - case vecOpBsl: - return "bsl" - case vecOpNot: - return "not" - case vecOpAnd: - return "and" - case vecOpOrr: - return "orr" - case vecOpEOR: - return "eor" - case vecOpFadd: - return "fadd" - case vecOpAdd: - return "add" - case vecOpAddp: - return "addp" - case vecOpAddv: - return "addv" - case vecOpSub: - return "sub" - case vecOpFsub: - return "fsub" - case vecOpSmin: - return "smin" - case vecOpUmin: - return "umin" - case vecOpUminv: - return "uminv" - case vecOpSmax: - return "smax" - case vecOpUmax: - return "umax" - case vecOpUmaxp: - return "umaxp" - case vecOpUrhadd: - return "urhadd" - case vecOpFmul: - return "fmul" - case vecOpSqrdmulh: - return "sqrdmulh" - case vecOpMul: - return "mul" - case vecOpUmlal: - return "umlal" - case vecOpFdiv: - return "fdiv" - case vecOpFsqrt: - return "fsqrt" - case vecOpAbs: - return "abs" - case vecOpFabs: - return "fabs" - case vecOpNeg: - return "neg" - case vecOpFneg: - return "fneg" - case vecOpFrintp: - return "frintp" - case vecOpFrintm: - return "frintm" - case vecOpFrintn: - return "frintn" - case vecOpFrintz: - return "frintz" - case vecOpFcvtl: - return "fcvtl" - case vecOpFcvtn: - return "fcvtn" - case vecOpFcvtzu: - return "fcvtzu" - case vecOpFcvtzs: - return "fcvtzs" - case vecOpScvtf: - return "scvtf" - case vecOpUcvtf: - return "ucvtf" - case vecOpSqxtn: - return "sqxtn" - case vecOpUqxtn: - return "uqxtn" - case vecOpSqxtun: - return "sqxtun" - case vecOpRev64: - return "rev64" - case vecOpXtn: - return "xtn" - case vecOpShll: - return "shll" - case vecOpSshl: - return "sshl" - case vecOpSshll: - return "sshll" - case vecOpUshl: - return "ushl" - case vecOpUshll: - return "ushll" - case vecOpSshr: - return "sshr" - case vecOpZip1: - return "zip1" - case vecOpFmin: - return "fmin" - case vecOpFmax: - return "fmax" - case vecOpSmull: - return "smull" - case vecOpSmull2: - return "smull2" - } - panic(int(b)) -} - -const ( - vecOpCnt vecOp = iota - vecOpCmeq0 - vecOpCmeq - vecOpCmgt - vecOpCmhi - vecOpCmge - vecOpCmhs - vecOpFcmeq - vecOpFcmgt - vecOpFcmge - vecOpUaddlv - vecOpBit - vecOpBic - vecOpBsl - vecOpNot - vecOpAnd - vecOpOrr - vecOpEOR - vecOpAdd - vecOpFadd - vecOpAddv - vecOpSqadd - vecOpUqadd - vecOpAddp - vecOpSub - vecOpFsub - vecOpSqsub - vecOpUqsub - vecOpSmin - vecOpUmin - vecOpUminv - vecOpFmin - vecOpSmax - vecOpUmax - vecOpUmaxp - vecOpFmax - vecOpUrhadd - vecOpMul - vecOpFmul - vecOpSqrdmulh - vecOpUmlal - vecOpFdiv - vecOpFsqrt - vecOpAbs - vecOpFabs - vecOpNeg - vecOpFneg - vecOpFrintm - vecOpFrintn - vecOpFrintp - vecOpFrintz - vecOpFcvtl - vecOpFcvtn - vecOpFcvtzs - vecOpFcvtzu - vecOpScvtf - vecOpUcvtf - vecOpSqxtn - vecOpSqxtun - vecOpUqxtn - vecOpRev64 - vecOpXtn - vecOpShll - vecOpSshl - vecOpSshll - vecOpUshl - vecOpUshll - vecOpSshr - vecOpZip1 - vecOpSmull - vecOpSmull2 -) - -// bitOp determines the type of bitwise operation. Instructions whose kind is one of -// bitOpRbit and bitOpClz would use this type. -type bitOp int - -// String implements fmt.Stringer. -func (b bitOp) String() string { - switch b { - case bitOpRbit: - return "rbit" - case bitOpClz: - return "clz" - } - panic(int(b)) -} - -const ( - // 32/64-bit Rbit. - bitOpRbit bitOp = iota - // 32/64-bit Clz. - bitOpClz -) - -// fpuUniOp represents a unary floating-point unit (FPU) operation. -type fpuUniOp byte - -const ( - fpuUniOpNeg fpuUniOp = iota - fpuUniOpCvt32To64 - fpuUniOpCvt64To32 - fpuUniOpSqrt - fpuUniOpRoundPlus - fpuUniOpRoundMinus - fpuUniOpRoundZero - fpuUniOpRoundNearest - fpuUniOpAbs -) - -// String implements the fmt.Stringer. -func (f fpuUniOp) String() string { - switch f { - case fpuUniOpNeg: - return "fneg" - case fpuUniOpCvt32To64: - return "fcvt" - case fpuUniOpCvt64To32: - return "fcvt" - case fpuUniOpSqrt: - return "fsqrt" - case fpuUniOpRoundPlus: - return "frintp" - case fpuUniOpRoundMinus: - return "frintm" - case fpuUniOpRoundZero: - return "frintz" - case fpuUniOpRoundNearest: - return "frintn" - case fpuUniOpAbs: - return "fabs" - } - panic(int(f)) -} - -// fpuBinOp represents a binary floating-point unit (FPU) operation. -type fpuBinOp byte - -const ( - fpuBinOpAdd = iota - fpuBinOpSub - fpuBinOpMul - fpuBinOpDiv - fpuBinOpMax - fpuBinOpMin -) - -// String implements the fmt.Stringer. -func (f fpuBinOp) String() string { - switch f { - case fpuBinOpAdd: - return "fadd" - case fpuBinOpSub: - return "fsub" - case fpuBinOpMul: - return "fmul" - case fpuBinOpDiv: - return "fdiv" - case fpuBinOpMax: - return "fmax" - case fpuBinOpMin: - return "fmin" - } - panic(int(f)) -} - -// extMode represents the mode of a register operand extension. -// For example, aluRRRExtend instructions need this info to determine the extensions. -type extMode byte - -const ( - extModeNone extMode = iota - // extModeZeroExtend64 suggests a zero-extension to 32 bits if the original bit size is less than 32. - extModeZeroExtend32 - // extModeSignExtend64 stands for a sign-extension to 32 bits if the original bit size is less than 32. - extModeSignExtend32 - // extModeZeroExtend64 suggests a zero-extension to 64 bits if the original bit size is less than 64. - extModeZeroExtend64 - // extModeSignExtend64 stands for a sign-extension to 64 bits if the original bit size is less than 64. - extModeSignExtend64 -) - -func (e extMode) bits() byte { - switch e { - case extModeZeroExtend32, extModeSignExtend32: - return 32 - case extModeZeroExtend64, extModeSignExtend64: - return 64 - default: - return 0 - } -} - -func (e extMode) signed() bool { - switch e { - case extModeSignExtend32, extModeSignExtend64: - return true - default: - return false - } -} - -func extModeOf(t ssa.Type, signed bool) extMode { - switch t.Bits() { - case 32: - if signed { - return extModeSignExtend32 - } - return extModeZeroExtend32 - case 64: - if signed { - return extModeSignExtend64 - } - return extModeZeroExtend64 - default: - panic("TODO? do we need narrower than 32 bits?") - } -} - -type extendOp byte - -const ( - extendOpUXTB extendOp = 0b000 - extendOpUXTH extendOp = 0b001 - extendOpUXTW extendOp = 0b010 - // extendOpUXTX does nothing, but convenient symbol that officially exists. See: - // https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct - extendOpUXTX extendOp = 0b011 - extendOpSXTB extendOp = 0b100 - extendOpSXTH extendOp = 0b101 - extendOpSXTW extendOp = 0b110 - // extendOpSXTX does nothing, but convenient symbol that officially exists. See: - // https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct - extendOpSXTX extendOp = 0b111 - extendOpNone extendOp = 0xff -) - -func (e extendOp) srcBits() byte { - switch e { - case extendOpUXTB, extendOpSXTB: - return 8 - case extendOpUXTH, extendOpSXTH: - return 16 - case extendOpUXTW, extendOpSXTW: - return 32 - case extendOpUXTX, extendOpSXTX: - return 64 - } - panic(int(e)) -} - -func (e extendOp) String() string { - switch e { - case extendOpUXTB: - return "UXTB" - case extendOpUXTH: - return "UXTH" - case extendOpUXTW: - return "UXTW" - case extendOpUXTX: - return "UXTX" - case extendOpSXTB: - return "SXTB" - case extendOpSXTH: - return "SXTH" - case extendOpSXTW: - return "SXTW" - case extendOpSXTX: - return "SXTX" - } - panic(int(e)) -} - -func extendOpFrom(signed bool, from byte) extendOp { - switch from { - case 8: - if signed { - return extendOpSXTB - } - return extendOpUXTB - case 16: - if signed { - return extendOpSXTH - } - return extendOpUXTH - case 32: - if signed { - return extendOpSXTW - } - return extendOpUXTW - case 64: - if signed { - return extendOpSXTX - } - return extendOpUXTX - } - panic("invalid extendOpFrom") -} - -type shiftOp byte - -const ( - shiftOpLSL shiftOp = 0b00 - shiftOpLSR shiftOp = 0b01 - shiftOpASR shiftOp = 0b10 - shiftOpROR shiftOp = 0b11 -) - -func (s shiftOp) String() string { - switch s { - case shiftOpLSL: - return "lsl" - case shiftOpLSR: - return "lsr" - case shiftOpASR: - return "asr" - case shiftOpROR: - return "ror" - } - panic(int(s)) -} - -const exitSequenceSize = 6 * 4 // 6 instructions as in encodeExitSequence. - -// size returns the size of the instruction in encoded bytes. -func (i *instruction) size() int64 { - switch i.kind { - case exitSequence: - return exitSequenceSize // 5 instructions as in encodeExitSequence. - case nop0, loadConstBlockArg: - return 0 - case emitSourceOffsetInfo: - return 0 - case loadFpuConst32: - if i.u1 == 0 { - return 4 // zero loading can be encoded as a single instruction. - } - return 4 + 4 + 4 - case loadFpuConst64: - if i.u1 == 0 { - return 4 // zero loading can be encoded as a single instruction. - } - return 4 + 4 + 8 - case loadFpuConst128: - if i.u1 == 0 && i.u2 == 0 { - return 4 // zero loading can be encoded as a single instruction. - } - return 4 + 4 + 16 - case brTableSequence: - return 4*4 + int64(i.u2)*4 - default: - return 4 - } -} - -// vecArrangement is the arrangement of data within a vector register. -type vecArrangement byte - -const ( - // vecArrangementNone is an arrangement indicating no data is stored. - vecArrangementNone vecArrangement = iota - // vecArrangement8B is an arrangement of 8 bytes (64-bit vector) - vecArrangement8B - // vecArrangement16B is an arrangement of 16 bytes (128-bit vector) - vecArrangement16B - // vecArrangement4H is an arrangement of 4 half precisions (64-bit vector) - vecArrangement4H - // vecArrangement8H is an arrangement of 8 half precisions (128-bit vector) - vecArrangement8H - // vecArrangement2S is an arrangement of 2 single precisions (64-bit vector) - vecArrangement2S - // vecArrangement4S is an arrangement of 4 single precisions (128-bit vector) - vecArrangement4S - // vecArrangement1D is an arrangement of 1 double precision (64-bit vector) - vecArrangement1D - // vecArrangement2D is an arrangement of 2 double precisions (128-bit vector) - vecArrangement2D - - // Assign each vector size specifier to a vector arrangement ID. - // Instructions can only have an arrangement or a size specifier, but not both, so it - // simplifies the internal representation of vector instructions by being able to - // store either into the same field. - - // vecArrangementB is a size specifier of byte - vecArrangementB - // vecArrangementH is a size specifier of word (16-bit) - vecArrangementH - // vecArrangementS is a size specifier of double word (32-bit) - vecArrangementS - // vecArrangementD is a size specifier of quad word (64-bit) - vecArrangementD - // vecArrangementQ is a size specifier of the entire vector (128-bit) - vecArrangementQ -) - -// String implements fmt.Stringer -func (v vecArrangement) String() (ret string) { - switch v { - case vecArrangement8B: - ret = "8B" - case vecArrangement16B: - ret = "16B" - case vecArrangement4H: - ret = "4H" - case vecArrangement8H: - ret = "8H" - case vecArrangement2S: - ret = "2S" - case vecArrangement4S: - ret = "4S" - case vecArrangement1D: - ret = "1D" - case vecArrangement2D: - ret = "2D" - case vecArrangementB: - ret = "B" - case vecArrangementH: - ret = "H" - case vecArrangementS: - ret = "S" - case vecArrangementD: - ret = "D" - case vecArrangementQ: - ret = "Q" - case vecArrangementNone: - ret = "none" - default: - panic(v) - } - return -} - -// vecIndex is the index of an element of a vector register -type vecIndex byte - -// vecIndexNone indicates no vector index specified. -const vecIndexNone = ^vecIndex(0) - -func ssaLaneToArrangement(lane ssa.VecLane) vecArrangement { - switch lane { - case ssa.VecLaneI8x16: - return vecArrangement16B - case ssa.VecLaneI16x8: - return vecArrangement8H - case ssa.VecLaneI32x4: - return vecArrangement4S - case ssa.VecLaneI64x2: - return vecArrangement2D - case ssa.VecLaneF32x4: - return vecArrangement4S - case ssa.VecLaneF64x2: - return vecArrangement2D - default: - panic(lane) - } -} - -// atomicRmwOp is the type of atomic read-modify-write operation. -type atomicRmwOp byte - -const ( - // atomicRmwOpAdd is an atomic add operation. - atomicRmwOpAdd atomicRmwOp = iota - // atomicRmwOpClr is an atomic clear operation, i.e. AND NOT. - atomicRmwOpClr - // atomicRmwOpSet is an atomic set operation, i.e. OR. - atomicRmwOpSet - // atomicRmwOpEor is an atomic exclusive OR operation. - atomicRmwOpEor - // atomicRmwOpSwp is an atomic swap operation. - atomicRmwOpSwp -) - -// String implements fmt.Stringer -func (a atomicRmwOp) String() string { - switch a { - case atomicRmwOpAdd: - return "ldaddal" - case atomicRmwOpClr: - return "ldclral" - case atomicRmwOpSet: - return "ldsetal" - case atomicRmwOpEor: - return "ldeoral" - case atomicRmwOpSwp: - return "swpal" - } - panic(fmt.Sprintf("unknown atomicRmwOp: %d", a)) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go deleted file mode 100644 index 21be9b71e..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ /dev/null @@ -1,2351 +0,0 @@ -package arm64 - -import ( - "context" - "fmt" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -// Encode implements backend.Machine Encode. -func (m *machine) Encode(ctx context.Context) error { - m.resolveRelativeAddresses(ctx) - m.encode(m.rootInstr) - if l := len(m.compiler.Buf()); l > maxFunctionExecutableSize { - return fmt.Errorf("function size exceeds the limit: %d > %d", l, maxFunctionExecutableSize) - } - return nil -} - -func (m *machine) encode(root *instruction) { - for cur := root; cur != nil; cur = cur.next { - cur.encode(m) - } -} - -func (i *instruction) encode(m *machine) { - c := m.compiler - switch kind := i.kind; kind { - case nop0, emitSourceOffsetInfo, loadConstBlockArg: - case exitSequence: - encodeExitSequence(c, i.rn.reg()) - case ret: - // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/RET--Return-from-subroutine-?lang=en - c.Emit4Bytes(encodeRet()) - case br: - imm := i.brOffset() - c.Emit4Bytes(encodeUnconditionalBranch(false, imm)) - case call: - // We still don't know the exact address of the function to call, so we emit a placeholder. - c.AddRelocationInfo(i.callFuncRef()) - c.Emit4Bytes(encodeUnconditionalBranch(true, 0)) // 0 = placeholder - case callInd: - c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true)) - case store8, store16, store32, store64, fpuStore32, fpuStore64, fpuStore128: - c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], *i.getAmode())) - case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128: - c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.RealReg()], *i.getAmode())) - case vecLoad1R: - c.Emit4Bytes(encodeVecLoad1R( - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - vecArrangement(i.u1))) - case condBr: - imm19 := i.condBrOffset() - if imm19%4 != 0 { - panic("imm26 for branch must be a multiple of 4") - } - - imm19U32 := uint32(imm19/4) & 0b111_11111111_11111111 - brCond := i.condBrCond() - switch brCond.kind() { - case condKindRegisterZero: - rt := regNumberInEncoding[brCond.register().RealReg()] - c.Emit4Bytes(encodeCBZCBNZ(rt, false, imm19U32, i.condBr64bit())) - case condKindRegisterNotZero: - rt := regNumberInEncoding[brCond.register().RealReg()] - c.Emit4Bytes(encodeCBZCBNZ(rt, true, imm19U32, i.condBr64bit())) - case condKindCondFlagSet: - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B-cond--Branch-conditionally- - fl := brCond.flag() - c.Emit4Bytes(0b01010100<<24 | (imm19U32 << 5) | uint32(fl)) - default: - panic("BUG") - } - case movN: - c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32))) - case movZ: - c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32))) - case movK: - c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32))) - case mov32: - to, from := i.rd.RealReg(), i.rn.realReg() - c.Emit4Bytes(encodeAsMov32(regNumberInEncoding[from], regNumberInEncoding[to])) - case mov64: - to, from := i.rd.RealReg(), i.rn.realReg() - toIsSp := to == sp - fromIsSp := from == sp - c.Emit4Bytes(encodeMov64(regNumberInEncoding[to], regNumberInEncoding[from], toIsSp, fromIsSp)) - case loadP64, storeP64: - rt, rt2 := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()] - amode := i.getAmode() - rn := regNumberInEncoding[amode.rn.RealReg()] - var pre bool - switch amode.kind { - case addressModeKindPostIndex: - case addressModeKindPreIndex: - pre = true - default: - panic("BUG") - } - c.Emit4Bytes(encodePreOrPostIndexLoadStorePair64(pre, kind == loadP64, rn, rt, rt2, amode.imm)) - case loadFpuConst32: - rd := regNumberInEncoding[i.rd.RealReg()] - if i.u1 == 0 { - c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B)) - } else { - encodeLoadFpuConst32(c, rd, i.u1) - } - case loadFpuConst64: - rd := regNumberInEncoding[i.rd.RealReg()] - if i.u1 == 0 { - c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B)) - } else { - encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.RealReg()], i.u1) - } - case loadFpuConst128: - rd := regNumberInEncoding[i.rd.RealReg()] - lo, hi := i.u1, i.u2 - if lo == 0 && hi == 0 { - c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement16B)) - } else { - encodeLoadFpuConst128(c, rd, lo, hi) - } - case aluRRRR: - c.Emit4Bytes(encodeAluRRRR( - aluOp(i.u1), - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[i.rm.realReg()], - regNumberInEncoding[regalloc.VReg(i.u2).RealReg()], - uint32(i.u1>>32), - )) - case aluRRImmShift: - c.Emit4Bytes(encodeAluRRImm( - aluOp(i.u1), - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - uint32(i.rm.shiftImm()), - uint32(i.u2>>32), - )) - case aluRRR: - rn := i.rn.realReg() - c.Emit4Bytes(encodeAluRRR( - aluOp(i.u1), - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[rn], - regNumberInEncoding[i.rm.realReg()], - i.u2>>32 == 1, - rn == sp, - )) - case aluRRRExtend: - rm, exo, to := i.rm.er() - c.Emit4Bytes(encodeAluRRRExtend( - aluOp(i.u1), - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[rm.RealReg()], - exo, - to, - )) - case aluRRRShift: - r, amt, sop := i.rm.sr() - c.Emit4Bytes(encodeAluRRRShift( - aluOp(i.u1), - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[r.RealReg()], - uint32(amt), - sop, - i.u2>>32 == 1, - )) - case aluRRBitmaskImm: - c.Emit4Bytes(encodeAluBitmaskImmediate( - aluOp(i.u1), - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - i.u2, - i.u1>>32 == 1, - )) - case bitRR: - c.Emit4Bytes(encodeBitRR( - bitOp(i.u1), - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - uint32(i.u2)), - ) - case aluRRImm12: - imm12, shift := i.rm.imm12() - c.Emit4Bytes(encodeAluRRImm12( - aluOp(i.u1), - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - imm12, shift, - i.u2>>32 == 1, - )) - case fpuRRR: - c.Emit4Bytes(encodeFpuRRR( - fpuBinOp(i.u1), - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[i.rm.realReg()], - i.u2 == 1, - )) - case fpuMov64, fpuMov128: - // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/MOV--vector---Move-vector--an-alias-of-ORR--vector--register-- - rd := regNumberInEncoding[i.rd.RealReg()] - rn := regNumberInEncoding[i.rn.realReg()] - var q uint32 - if kind == fpuMov128 { - q = 0b1 - } - c.Emit4Bytes(q<<30 | 0b1110101<<21 | rn<<16 | 0b000111<<10 | rn<<5 | rd) - case cSet: - rd := regNumberInEncoding[i.rd.RealReg()] - cf := condFlag(i.u1) - if i.u2 == 1 { - // https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/CSETM--Conditional-Set-Mask--an-alias-of-CSINV- - // Note that we set 64bit version here. - c.Emit4Bytes(0b1101101010011111<<16 | uint32(cf.invert())<<12 | 0b011111<<5 | rd) - } else { - // https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/CSET--Conditional-Set--an-alias-of-CSINC- - // Note that we set 64bit version here. - c.Emit4Bytes(0b1001101010011111<<16 | uint32(cf.invert())<<12 | 0b111111<<5 | rd) - } - case extend: - c.Emit4Bytes(encodeExtend((i.u2>>32) == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()])) - case fpuCmp: - // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/FCMP--Floating-point-quiet-Compare--scalar--?lang=en - rn, rm := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()] - var ftype uint32 - if i.u1 == 1 { - ftype = 0b01 // double precision. - } - c.Emit4Bytes(0b1111<<25 | ftype<<22 | 1<<21 | rm<<16 | 0b1<<13 | rn<<5) - case udf: - // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UDF--Permanently-Undefined-?lang=en - if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { - c.Emit4Bytes(dummyInstruction) - } else { - c.Emit4Bytes(0) - } - case adr: - c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.RealReg()], uint32(i.u1))) - case cSel: - c.Emit4Bytes(encodeConditionalSelect( - kind, - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[i.rm.realReg()], - condFlag(i.u1), - i.u2 == 1, - )) - case fpuCSel: - c.Emit4Bytes(encodeFpuCSel( - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[i.rm.realReg()], - condFlag(i.u1), - i.u2 == 1, - )) - case movToVec: - c.Emit4Bytes(encodeMoveToVec( - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - vecArrangement(byte(i.u1)), - vecIndex(i.u2), - )) - case movFromVec, movFromVecSigned: - c.Emit4Bytes(encodeMoveFromVec( - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - vecArrangement(byte(i.u1)), - vecIndex(i.u2), - i.kind == movFromVecSigned, - )) - case vecDup: - c.Emit4Bytes(encodeVecDup( - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - vecArrangement(byte(i.u1)))) - case vecDupElement: - c.Emit4Bytes(encodeVecDupElement( - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - vecArrangement(byte(i.u1)), - vecIndex(i.u2))) - case vecExtract: - c.Emit4Bytes(encodeVecExtract( - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[i.rm.realReg()], - vecArrangement(byte(i.u1)), - uint32(i.u2))) - case vecPermute: - c.Emit4Bytes(encodeVecPermute( - vecOp(i.u1), - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[i.rm.realReg()], - vecArrangement(byte(i.u2)))) - case vecMovElement: - c.Emit4Bytes(encodeVecMovElement( - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - vecArrangement(i.u1), - uint32(i.u2), uint32(i.u2>>32), - )) - case vecMisc: - c.Emit4Bytes(encodeAdvancedSIMDTwoMisc( - vecOp(i.u1), - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - vecArrangement(i.u2), - )) - case vecLanes: - c.Emit4Bytes(encodeVecLanes( - vecOp(i.u1), - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - vecArrangement(i.u2), - )) - case vecShiftImm: - c.Emit4Bytes(encodeVecShiftImm( - vecOp(i.u1), - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - uint32(i.rm.shiftImm()), - vecArrangement(i.u2), - )) - case vecTbl: - c.Emit4Bytes(encodeVecTbl( - 1, - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[i.rm.realReg()], - vecArrangement(i.u2)), - ) - case vecTbl2: - c.Emit4Bytes(encodeVecTbl( - 2, - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[i.rm.realReg()], - vecArrangement(i.u2)), - ) - case brTableSequence: - targets := m.jmpTableTargets[i.u1] - encodeBrTableSequence(c, i.rn.reg(), targets) - case fpuToInt, intToFpu: - c.Emit4Bytes(encodeCnvBetweenFloatInt(i)) - case fpuRR: - c.Emit4Bytes(encodeFloatDataOneSource( - fpuUniOp(i.u1), - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - i.u2 == 1, - )) - case vecRRR: - if op := vecOp(i.u1); op == vecOpBsl || op == vecOpBit || op == vecOpUmlal { - panic(fmt.Sprintf("vecOp %s must use vecRRRRewrite instead of vecRRR", op.String())) - } - fallthrough - case vecRRRRewrite: - c.Emit4Bytes(encodeVecRRR( - vecOp(i.u1), - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[i.rm.realReg()], - vecArrangement(i.u2), - )) - case cCmpImm: - // Conditional compare (immediate) in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en - sf := uint32((i.u2 >> 32) & 0b1) - nzcv := uint32(i.u2 & 0b1111) - cond := uint32(condFlag(i.u1)) - imm := uint32(i.rm.data & 0b11111) - rn := regNumberInEncoding[i.rn.realReg()] - c.Emit4Bytes( - sf<<31 | 0b111101001<<22 | imm<<16 | cond<<12 | 0b1<<11 | rn<<5 | nzcv, - ) - case movFromFPSR: - rt := regNumberInEncoding[i.rd.RealReg()] - c.Emit4Bytes(encodeSystemRegisterMove(rt, true)) - case movToFPSR: - rt := regNumberInEncoding[i.rn.realReg()] - c.Emit4Bytes(encodeSystemRegisterMove(rt, false)) - case atomicRmw: - c.Emit4Bytes(encodeAtomicRmw( - atomicRmwOp(i.u1), - regNumberInEncoding[i.rm.realReg()], - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rn.realReg()], - uint32(i.u2), - )) - case atomicCas: - c.Emit4Bytes(encodeAtomicCas( - regNumberInEncoding[i.rd.RealReg()], - regNumberInEncoding[i.rm.realReg()], - regNumberInEncoding[i.rn.realReg()], - uint32(i.u2), - )) - case atomicLoad: - c.Emit4Bytes(encodeAtomicLoadStore( - regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[i.rd.RealReg()], - uint32(i.u2), - 1, - )) - case atomicStore: - c.Emit4Bytes(encodeAtomicLoadStore( - regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[i.rm.realReg()], - uint32(i.u2), - 0, - )) - case dmb: - c.Emit4Bytes(encodeDMB()) - default: - panic(i.String()) - } -} - -func encodeMov64(rd, rn uint32, toIsSp, fromIsSp bool) uint32 { - if toIsSp || fromIsSp { - // This is an alias of ADD (immediate): - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--to-from-SP---Move-between-register-and-stack-pointer--an-alias-of-ADD--immediate-- - return encodeAddSubtractImmediate(0b100, 0, 0, rn, rd) - } else { - // This is an alias of ORR (shifted register): - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register-- - return encodeLogicalShiftedRegister(0b101, 0, rn, 0, regNumberInEncoding[xzr], rd) - } -} - -// encodeSystemRegisterMove encodes as "System register move" in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en -// -// Note that currently we only supports read/write of FPSR. -func encodeSystemRegisterMove(rt uint32, fromSystem bool) uint32 { - ret := 0b11010101<<24 | 0b11011<<16 | 0b01000100<<8 | 0b001<<5 | rt - if fromSystem { - ret |= 0b1 << 21 - } - return ret -} - -// encodeVecRRR encodes as either "Advanced SIMD three *" in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en -func encodeVecRRR(op vecOp, rd, rn, rm uint32, arr vecArrangement) uint32 { - switch op { - case vecOpBit: - _, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b10 /* always has size 0b10 */, 0b1, q) - case vecOpBic: - if arr > vecArrangement16B { - panic("unsupported arrangement: " + arr.String()) - } - _, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b01 /* always has size 0b01 */, 0b0, q) - case vecOpBsl: - if arr > vecArrangement16B { - panic("unsupported arrangement: " + arr.String()) - } - _, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b01 /* always has size 0b01 */, 0b1, q) - case vecOpAnd: - if arr > vecArrangement16B { - panic("unsupported arrangement: " + arr.String()) - } - _, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b00 /* always has size 0b00 */, 0b0, q) - case vecOpOrr: - _, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b10 /* always has size 0b10 */, 0b0, q) - case vecOpEOR: - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, size, 0b1, q) - case vecOpCmeq: - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10001, size, 0b1, q) - case vecOpCmgt: - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00110, size, 0b0, q) - case vecOpCmhi: - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00110, size, 0b1, q) - case vecOpCmge: - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00111, size, 0b0, q) - case vecOpCmhs: - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00111, size, 0b1, q) - case vecOpFcmeq: - var size, q uint32 - switch arr { - case vecArrangement4S: - size, q = 0b00, 0b1 - case vecArrangement2S: - size, q = 0b00, 0b0 - case vecArrangement2D: - size, q = 0b01, 0b1 - default: - panic("unsupported arrangement: " + arr.String()) - } - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11100, size, 0b0, q) - case vecOpFcmgt: - if arr < vecArrangement2S || arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11100, size, 0b1, q) - case vecOpFcmge: - var size, q uint32 - switch arr { - case vecArrangement4S: - size, q = 0b00, 0b1 - case vecArrangement2S: - size, q = 0b00, 0b0 - case vecArrangement2D: - size, q = 0b01, 0b1 - default: - panic("unsupported arrangement: " + arr.String()) - } - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11100, size, 0b1, q) - case vecOpAdd: - if arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10000, size, 0b0, q) - case vecOpSqadd: - if arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00001, size, 0b0, q) - case vecOpUqadd: - if arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00001, size, 0b1, q) - case vecOpAddp: - if arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10111, size, 0b0, q) - case vecOpSqsub: - if arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00101, size, 0b0, q) - case vecOpUqsub: - if arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00101, size, 0b1, q) - case vecOpSub: - if arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10000, size, 0b1, q) - case vecOpFmin: - if arr < vecArrangement2S || arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11110, size, 0b0, q) - case vecOpSmin: - if arr > vecArrangement4S { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01101, size, 0b0, q) - case vecOpUmin: - if arr > vecArrangement4S { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01101, size, 0b1, q) - case vecOpFmax: - var size, q uint32 - switch arr { - case vecArrangement4S: - size, q = 0b00, 0b1 - case vecArrangement2S: - size, q = 0b00, 0b0 - case vecArrangement2D: - size, q = 0b01, 0b1 - default: - panic("unsupported arrangement: " + arr.String()) - } - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11110, size, 0b0, q) - case vecOpFadd: - var size, q uint32 - switch arr { - case vecArrangement4S: - size, q = 0b00, 0b1 - case vecArrangement2S: - size, q = 0b00, 0b0 - case vecArrangement2D: - size, q = 0b01, 0b1 - default: - panic("unsupported arrangement: " + arr.String()) - } - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11010, size, 0b0, q) - case vecOpFsub: - if arr < vecArrangement2S || arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11010, size, 0b0, q) - case vecOpFmul: - var size, q uint32 - switch arr { - case vecArrangement4S: - size, q = 0b00, 0b1 - case vecArrangement2S: - size, q = 0b00, 0b0 - case vecArrangement2D: - size, q = 0b01, 0b1 - default: - panic("unsupported arrangement: " + arr.String()) - } - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11011, size, 0b1, q) - case vecOpSqrdmulh: - if arr < vecArrangement4H || arr > vecArrangement4S { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10110, size, 0b1, q) - case vecOpFdiv: - var size, q uint32 - switch arr { - case vecArrangement4S: - size, q = 0b00, 0b1 - case vecArrangement2S: - size, q = 0b00, 0b0 - case vecArrangement2D: - size, q = 0b01, 0b1 - default: - panic("unsupported arrangement: " + arr.String()) - } - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11111, size, 0b1, q) - case vecOpSmax: - if arr > vecArrangement4S { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01100, size, 0b0, q) - case vecOpUmax: - if arr > vecArrangement4S { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01100, size, 0b1, q) - case vecOpUmaxp: - if arr > vecArrangement4S { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10100, size, 0b1, q) - case vecOpUrhadd: - if arr > vecArrangement4S { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00010, size, 0b1, q) - case vecOpMul: - if arr > vecArrangement4S { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10011, size, 0b0, q) - case vecOpUmlal: - if arr > vecArrangement4S { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeDifferent(rd, rn, rm, 0b1000, size, 0b1, q) - case vecOpSshl: - if arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01000, size, 0b0, q) - case vecOpUshl: - if arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - size, q := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01000, size, 0b1, q) - - case vecOpSmull: - if arr > vecArrangement4S { - panic("unsupported arrangement: " + arr.String()) - } - size, _ := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeDifferent(rd, rn, rm, 0b1100, size, 0b0, 0b0) - - case vecOpSmull2: - if arr > vecArrangement4S { - panic("unsupported arrangement: " + arr.String()) - } - size, _ := arrToSizeQEncoded(arr) - return encodeAdvancedSIMDThreeDifferent(rd, rn, rm, 0b1100, size, 0b0, 0b1) - - default: - panic("TODO: " + op.String()) - } -} - -func arrToSizeQEncoded(arr vecArrangement) (size, q uint32) { - switch arr { - case vecArrangement16B: - q = 0b1 - fallthrough - case vecArrangement8B: - size = 0b00 - case vecArrangement8H: - q = 0b1 - fallthrough - case vecArrangement4H: - size = 0b01 - case vecArrangement4S: - q = 0b1 - fallthrough - case vecArrangement2S: - size = 0b10 - case vecArrangement2D: - q = 0b1 - fallthrough - case vecArrangement1D: - size = 0b11 - default: - panic("BUG") - } - return -} - -// encodeAdvancedSIMDThreeSame encodes as "Advanced SIMD three same" in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en -func encodeAdvancedSIMDThreeSame(rd, rn, rm, opcode, size, U, Q uint32) uint32 { - return Q<<30 | U<<29 | 0b111<<25 | size<<22 | 0b1<<21 | rm<<16 | opcode<<11 | 0b1<<10 | rn<<5 | rd -} - -// encodeAdvancedSIMDThreeDifferent encodes as "Advanced SIMD three different" in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en -func encodeAdvancedSIMDThreeDifferent(rd, rn, rm, opcode, size, U, Q uint32) uint32 { - return Q<<30 | U<<29 | 0b111<<25 | size<<22 | 0b1<<21 | rm<<16 | opcode<<12 | rn<<5 | rd -} - -// encodeFloatDataOneSource encodes as "Floating-point data-processing (1 source)" in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp -func encodeFloatDataOneSource(op fpuUniOp, rd, rn uint32, dst64bit bool) uint32 { - var opcode, ptype uint32 - switch op { - case fpuUniOpCvt32To64: - opcode = 0b000101 - case fpuUniOpCvt64To32: - opcode = 0b000100 - ptype = 0b01 - case fpuUniOpNeg: - opcode = 0b000010 - if dst64bit { - ptype = 0b01 - } - case fpuUniOpSqrt: - opcode = 0b000011 - if dst64bit { - ptype = 0b01 - } - case fpuUniOpRoundPlus: - opcode = 0b001001 - if dst64bit { - ptype = 0b01 - } - case fpuUniOpRoundMinus: - opcode = 0b001010 - if dst64bit { - ptype = 0b01 - } - case fpuUniOpRoundZero: - opcode = 0b001011 - if dst64bit { - ptype = 0b01 - } - case fpuUniOpRoundNearest: - opcode = 0b001000 - if dst64bit { - ptype = 0b01 - } - case fpuUniOpAbs: - opcode = 0b000001 - if dst64bit { - ptype = 0b01 - } - default: - panic("BUG") - } - return 0b1111<<25 | ptype<<22 | 0b1<<21 | opcode<<15 | 0b1<<14 | rn<<5 | rd -} - -// encodeCnvBetweenFloatInt encodes as "Conversion between floating-point and integer" in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en -func encodeCnvBetweenFloatInt(i *instruction) uint32 { - rd := regNumberInEncoding[i.rd.RealReg()] - rn := regNumberInEncoding[i.rn.realReg()] - - var opcode uint32 - var rmode uint32 - var ptype uint32 - var sf uint32 - switch i.kind { - case intToFpu: // Either UCVTF or SCVTF. - rmode = 0b00 - - signed := i.u1 == 1 - src64bit := i.u2&1 != 0 - dst64bit := i.u2&2 != 0 - if signed { - opcode = 0b010 - } else { - opcode = 0b011 - } - if src64bit { - sf = 0b1 - } - if dst64bit { - ptype = 0b01 - } else { - ptype = 0b00 - } - case fpuToInt: // Either FCVTZU or FCVTZS. - rmode = 0b11 - - signed := i.u1 == 1 - src64bit := i.u2&1 != 0 - dst64bit := i.u2&2 != 0 - - if signed { - opcode = 0b000 - } else { - opcode = 0b001 - } - if dst64bit { - sf = 0b1 - } - if src64bit { - ptype = 0b01 - } else { - ptype = 0b00 - } - } - return sf<<31 | 0b1111<<25 | ptype<<22 | 0b1<<21 | rmode<<19 | opcode<<16 | rn<<5 | rd -} - -// encodeAdr encodes a PC-relative ADR instruction. -// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/ADR--Form-PC-relative-address- -func encodeAdr(rd uint32, offset uint32) uint32 { - if offset >= 1<<20 { - panic("BUG: too large adr instruction") - } - return offset&0b11<<29 | 0b1<<28 | offset&0b1111111111_1111111100<<3 | rd -} - -// encodeFpuCSel encodes as "Floating-point conditional select" in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en -func encodeFpuCSel(rd, rn, rm uint32, c condFlag, _64bit bool) uint32 { - var ftype uint32 - if _64bit { - ftype = 0b01 // double precision. - } - return 0b1111<<25 | ftype<<22 | 0b1<<21 | rm<<16 | uint32(c)<<12 | 0b11<<10 | rn<<5 | rd -} - -// encodeMoveToVec encodes as "Move general-purpose register to a vector element" (represented as `ins`) in -// https://developer.arm.com/documentation/dui0801/g/A64-SIMD-Vector-Instructions/MOV--vector--from-general- -// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/MOV--from-general---Move-general-purpose-register-to-a-vector-element--an-alias-of-INS--general--?lang=en -func encodeMoveToVec(rd, rn uint32, arr vecArrangement, index vecIndex) uint32 { - var imm5 uint32 - switch arr { - case vecArrangementB: - imm5 |= 0b1 - imm5 |= uint32(index) << 1 - if index > 0b1111 { - panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 15", index)) - } - case vecArrangementH: - imm5 |= 0b10 - imm5 |= uint32(index) << 2 - if index > 0b111 { - panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 7", index)) - } - case vecArrangementS: - imm5 |= 0b100 - imm5 |= uint32(index) << 3 - if index > 0b11 { - panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 3", index)) - } - case vecArrangementD: - imm5 |= 0b1000 - imm5 |= uint32(index) << 4 - if index > 0b1 { - panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 1", index)) - } - default: - panic("Unsupported arrangement " + arr.String()) - } - - return 0b01001110000<<21 | imm5<<16 | 0b000111<<10 | rn<<5 | rd -} - -// encodeMoveToVec encodes as "Move vector element to another vector element, mov (element)" (represented as `ins`) in -// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/MOV--element---Move-vector-element-to-another-vector-element--an-alias-of-INS--element--?lang=en -// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/INS--element---Insert-vector-element-from-another-vector-element-?lang=en -func encodeVecMovElement(rd, rn uint32, arr vecArrangement, srcIndex, dstIndex uint32) uint32 { - var imm4, imm5 uint32 - switch arr { - case vecArrangementB: - imm5 |= 0b1 - imm5 |= srcIndex << 1 - imm4 = dstIndex - if srcIndex > 0b1111 { - panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 15", srcIndex)) - } - case vecArrangementH: - imm5 |= 0b10 - imm5 |= srcIndex << 2 - imm4 = dstIndex << 1 - if srcIndex > 0b111 { - panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 7", srcIndex)) - } - case vecArrangementS: - imm5 |= 0b100 - imm5 |= srcIndex << 3 - imm4 = dstIndex << 2 - if srcIndex > 0b11 { - panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 3", srcIndex)) - } - case vecArrangementD: - imm5 |= 0b1000 - imm5 |= srcIndex << 4 - imm4 = dstIndex << 3 - if srcIndex > 0b1 { - panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 1", srcIndex)) - } - default: - panic("Unsupported arrangement " + arr.String()) - } - - return 0b01101110000<<21 | imm5<<16 | imm4<<11 | 0b1<<10 | rn<<5 | rd -} - -// encodeUnconditionalBranchReg encodes as "Unconditional branch (register)" in: -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en -func encodeUnconditionalBranchReg(rn uint32, link bool) uint32 { - var opc uint32 - if link { - opc = 0b0001 - } - return 0b1101011<<25 | opc<<21 | 0b11111<<16 | rn<<5 -} - -// encodeMoveFromVec encodes as "Move vector element to a general-purpose register" -// (represented as `umov` when dest is 32-bit, `umov` otherwise) in -// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/UMOV--Unsigned-Move-vector-element-to-general-purpose-register-?lang=en -// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/MOV--to-general---Move-vector-element-to-general-purpose-register--an-alias-of-UMOV-?lang=en -func encodeMoveFromVec(rd, rn uint32, arr vecArrangement, index vecIndex, signed bool) uint32 { - var op, imm4, q, imm5 uint32 - switch { - case arr == vecArrangementB: - imm5 |= 0b1 - imm5 |= uint32(index) << 1 - if index > 0b1111 { - panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 15", index)) - } - case arr == vecArrangementH: - imm5 |= 0b10 - imm5 |= uint32(index) << 2 - if index > 0b111 { - panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 7", index)) - } - case arr == vecArrangementS && signed: - q = 0b1 - fallthrough - case arr == vecArrangementS: - imm5 |= 0b100 - imm5 |= uint32(index) << 3 - if index > 0b11 { - panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 3", index)) - } - case arr == vecArrangementD && !signed: - imm5 |= 0b1000 - imm5 |= uint32(index) << 4 - q = 0b1 - if index > 0b1 { - panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 1", index)) - } - default: - panic("Unsupported arrangement " + arr.String()) - } - if signed { - op, imm4 = 0, 0b0101 - } else { - op, imm4 = 0, 0b0111 - } - return op<<29 | 0b01110000<<21 | q<<30 | imm5<<16 | imm4<<11 | 1<<10 | rn<<5 | rd -} - -// encodeVecDup encodes as "Duplicate general-purpose register to vector" DUP (general) -// (represented as `dup`) -// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/DUP--general---Duplicate-general-purpose-register-to-vector-?lang=en -func encodeVecDup(rd, rn uint32, arr vecArrangement) uint32 { - var q, imm5 uint32 - switch arr { - case vecArrangement8B: - q, imm5 = 0b0, 0b1 - case vecArrangement16B: - q, imm5 = 0b1, 0b1 - case vecArrangement4H: - q, imm5 = 0b0, 0b10 - case vecArrangement8H: - q, imm5 = 0b1, 0b10 - case vecArrangement2S: - q, imm5 = 0b0, 0b100 - case vecArrangement4S: - q, imm5 = 0b1, 0b100 - case vecArrangement2D: - q, imm5 = 0b1, 0b1000 - default: - panic("Unsupported arrangement " + arr.String()) - } - return q<<30 | 0b001110000<<21 | imm5<<16 | 0b000011<<10 | rn<<5 | rd -} - -// encodeVecDup encodes as "Duplicate vector element to vector or scalar" DUP (element). -// (represented as `dup`) -// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/DUP--element---Duplicate-vector-element-to-vector-or-scalar- -func encodeVecDupElement(rd, rn uint32, arr vecArrangement, srcIndex vecIndex) uint32 { - var q, imm5 uint32 - q = 0b1 - switch arr { - case vecArrangementB: - imm5 |= 0b1 - imm5 |= uint32(srcIndex) << 1 - case vecArrangementH: - imm5 |= 0b10 - imm5 |= uint32(srcIndex) << 2 - case vecArrangementS: - imm5 |= 0b100 - imm5 |= uint32(srcIndex) << 3 - case vecArrangementD: - imm5 |= 0b1000 - imm5 |= uint32(srcIndex) << 4 - default: - panic("unsupported arrangement" + arr.String()) - } - - return q<<30 | 0b001110000<<21 | imm5<<16 | 0b1<<10 | rn<<5 | rd -} - -// encodeVecExtract encodes as "Advanced SIMD extract." -// Currently only `ext` is defined. -// https://developer.arm.com/documentation/ddi0602/2023-06/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp -// https://developer.arm.com/documentation/ddi0602/2023-06/SIMD-FP-Instructions/EXT--Extract-vector-from-pair-of-vectors-?lang=en -func encodeVecExtract(rd, rn, rm uint32, arr vecArrangement, index uint32) uint32 { - var q, imm4 uint32 - switch arr { - case vecArrangement8B: - q, imm4 = 0, 0b0111&uint32(index) - case vecArrangement16B: - q, imm4 = 1, 0b1111&uint32(index) - default: - panic("Unsupported arrangement " + arr.String()) - } - return q<<30 | 0b101110000<<21 | rm<<16 | imm4<<11 | rn<<5 | rd -} - -// encodeVecPermute encodes as "Advanced SIMD permute." -// https://developer.arm.com/documentation/ddi0602/2023-06/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp -func encodeVecPermute(op vecOp, rd, rn, rm uint32, arr vecArrangement) uint32 { - var q, size, opcode uint32 - switch op { - case vecOpZip1: - opcode = 0b011 - if arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - size, q = arrToSizeQEncoded(arr) - default: - panic("TODO: " + op.String()) - } - return q<<30 | 0b001110<<24 | size<<22 | rm<<16 | opcode<<12 | 0b10<<10 | rn<<5 | rd -} - -// encodeConditionalSelect encodes as "Conditional select" in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en#condsel -func encodeConditionalSelect(kind instructionKind, rd, rn, rm uint32, c condFlag, _64bit bool) uint32 { - if kind != cSel { - panic("TODO: support other conditional select") - } - - ret := 0b110101<<23 | rm<<16 | uint32(c)<<12 | rn<<5 | rd - if _64bit { - ret |= 0b1 << 31 - } - return ret -} - -const dummyInstruction uint32 = 0x14000000 // "b 0" - -// encodeLoadFpuConst32 encodes the following three instructions: -// -// ldr s8, #8 ;; literal load of data.f32 -// b 8 ;; skip the data -// data.f32 xxxxxxx -func encodeLoadFpuConst32(c backend.Compiler, rd uint32, rawF32 uint64) { - c.Emit4Bytes( - // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/LDR--literal--SIMD-FP---Load-SIMD-FP-Register--PC-relative-literal--?lang=en - 0b111<<26 | (0x8/4)<<5 | rd, - ) - c.Emit4Bytes(encodeUnconditionalBranch(false, 8)) // b 8 - if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { - // Inlined data.f32 cannot be disassembled, so we add a dummy instruction here. - c.Emit4Bytes(dummyInstruction) - } else { - c.Emit4Bytes(uint32(rawF32)) // data.f32 xxxxxxx - } -} - -// encodeLoadFpuConst64 encodes the following three instructions: -// -// ldr d8, #8 ;; literal load of data.f64 -// b 12 ;; skip the data -// data.f64 xxxxxxx -func encodeLoadFpuConst64(c backend.Compiler, rd uint32, rawF64 uint64) { - c.Emit4Bytes( - // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/LDR--literal--SIMD-FP---Load-SIMD-FP-Register--PC-relative-literal--?lang=en - 0b1<<30 | 0b111<<26 | (0x8/4)<<5 | rd, - ) - c.Emit4Bytes(encodeUnconditionalBranch(false, 12)) // b 12 - if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { - // Inlined data.f64 cannot be disassembled, so we add dummy instructions here. - c.Emit4Bytes(dummyInstruction) - c.Emit4Bytes(dummyInstruction) - } else { - // data.f64 xxxxxxx - c.Emit4Bytes(uint32(rawF64)) - c.Emit4Bytes(uint32(rawF64 >> 32)) - } -} - -// encodeLoadFpuConst128 encodes the following three instructions: -// -// ldr v8, #8 ;; literal load of data.f64 -// b 20 ;; skip the data -// data.v128 xxxxxxx -func encodeLoadFpuConst128(c backend.Compiler, rd uint32, lo, hi uint64) { - c.Emit4Bytes( - // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/LDR--literal--SIMD-FP---Load-SIMD-FP-Register--PC-relative-literal--?lang=en - 0b1<<31 | 0b111<<26 | (0x8/4)<<5 | rd, - ) - c.Emit4Bytes(encodeUnconditionalBranch(false, 20)) // b 20 - if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { - // Inlined data.v128 cannot be disassembled, so we add dummy instructions here. - c.Emit4Bytes(dummyInstruction) - c.Emit4Bytes(dummyInstruction) - c.Emit4Bytes(dummyInstruction) - c.Emit4Bytes(dummyInstruction) - } else { - // data.v128 xxxxxxx - c.Emit4Bytes(uint32(lo)) - c.Emit4Bytes(uint32(lo >> 32)) - c.Emit4Bytes(uint32(hi)) - c.Emit4Bytes(uint32(hi >> 32)) - } -} - -// encodeAluRRRR encodes as Data-processing (3 source) in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en -func encodeAluRRRR(op aluOp, rd, rn, rm, ra, _64bit uint32) uint32 { - var oO, op31 uint32 - switch op { - case aluOpMAdd: - op31, oO = 0b000, 0b0 - case aluOpMSub: - op31, oO = 0b000, 0b1 - default: - panic("TODO/BUG") - } - return _64bit<<31 | 0b11011<<24 | op31<<21 | rm<<16 | oO<<15 | ra<<10 | rn<<5 | rd -} - -// encodeBitRR encodes as Data-processing (1 source) in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en -func encodeBitRR(op bitOp, rd, rn, _64bit uint32) uint32 { - var opcode2, opcode uint32 - switch op { - case bitOpRbit: - opcode2, opcode = 0b00000, 0b000000 - case bitOpClz: - opcode2, opcode = 0b00000, 0b000100 - default: - panic("TODO/BUG") - } - return _64bit<<31 | 0b1_0_11010110<<21 | opcode2<<15 | opcode<<10 | rn<<5 | rd -} - -func encodeAsMov32(rn, rd uint32) uint32 { - // This is an alias of ORR (shifted register): - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register-- - return encodeLogicalShiftedRegister(0b001, 0, rn, 0, regNumberInEncoding[xzr], rd) -} - -// encodeExtend encodes extension instructions. -func encodeExtend(signed bool, from, to byte, rd, rn uint32) uint32 { - // UTXB: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UXTB--Unsigned-Extend-Byte--an-alias-of-UBFM-?lang=en - // UTXH: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UXTH--Unsigned-Extend-Halfword--an-alias-of-UBFM-?lang=en - // STXB: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SXTB--Signed-Extend-Byte--an-alias-of-SBFM- - // STXH: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SXTH--Sign-Extend-Halfword--an-alias-of-SBFM- - // STXW: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SXTW--Sign-Extend-Word--an-alias-of-SBFM- - var _31to10 uint32 - switch { - case !signed && from == 8 && to == 32: - // 32-bit UXTB - _31to10 = 0b0101001100000000000111 - case !signed && from == 16 && to == 32: - // 32-bit UXTH - _31to10 = 0b0101001100000000001111 - case !signed && from == 8 && to == 64: - // 64-bit UXTB - _31to10 = 0b0101001100000000000111 - case !signed && from == 16 && to == 64: - // 64-bit UXTH - _31to10 = 0b0101001100000000001111 - case !signed && from == 32 && to == 64: - return encodeAsMov32(rn, rd) - case signed && from == 8 && to == 32: - // 32-bit SXTB - _31to10 = 0b0001001100000000000111 - case signed && from == 16 && to == 32: - // 32-bit SXTH - _31to10 = 0b0001001100000000001111 - case signed && from == 8 && to == 64: - // 64-bit SXTB - _31to10 = 0b1001001101000000000111 - case signed && from == 16 && to == 64: - // 64-bit SXTH - _31to10 = 0b1001001101000000001111 - case signed && from == 32 && to == 64: - // SXTW - _31to10 = 0b1001001101000000011111 - default: - panic("BUG") - } - return _31to10<<10 | rn<<5 | rd -} - -func encodeLoadOrStore(kind instructionKind, rt uint32, amode addressMode) uint32 { - var _22to31 uint32 - var bits int64 - switch kind { - case uLoad8: - _22to31 = 0b0011100001 - bits = 8 - case sLoad8: - _22to31 = 0b0011100010 - bits = 8 - case uLoad16: - _22to31 = 0b0111100001 - bits = 16 - case sLoad16: - _22to31 = 0b0111100010 - bits = 16 - case uLoad32: - _22to31 = 0b1011100001 - bits = 32 - case sLoad32: - _22to31 = 0b1011100010 - bits = 32 - case uLoad64: - _22to31 = 0b1111100001 - bits = 64 - case fpuLoad32: - _22to31 = 0b1011110001 - bits = 32 - case fpuLoad64: - _22to31 = 0b1111110001 - bits = 64 - case fpuLoad128: - _22to31 = 0b0011110011 - bits = 128 - case store8: - _22to31 = 0b0011100000 - bits = 8 - case store16: - _22to31 = 0b0111100000 - bits = 16 - case store32: - _22to31 = 0b1011100000 - bits = 32 - case store64: - _22to31 = 0b1111100000 - bits = 64 - case fpuStore32: - _22to31 = 0b1011110000 - bits = 32 - case fpuStore64: - _22to31 = 0b1111110000 - bits = 64 - case fpuStore128: - _22to31 = 0b0011110010 - bits = 128 - default: - panic("BUG") - } - - switch amode.kind { - case addressModeKindRegScaledExtended: - return encodeLoadOrStoreExtended(_22to31, - regNumberInEncoding[amode.rn.RealReg()], - regNumberInEncoding[amode.rm.RealReg()], - rt, true, amode.extOp) - case addressModeKindRegScaled: - return encodeLoadOrStoreExtended(_22to31, - regNumberInEncoding[amode.rn.RealReg()], regNumberInEncoding[amode.rm.RealReg()], - rt, true, extendOpNone) - case addressModeKindRegExtended: - return encodeLoadOrStoreExtended(_22to31, - regNumberInEncoding[amode.rn.RealReg()], regNumberInEncoding[amode.rm.RealReg()], - rt, false, amode.extOp) - case addressModeKindRegReg: - return encodeLoadOrStoreExtended(_22to31, - regNumberInEncoding[amode.rn.RealReg()], regNumberInEncoding[amode.rm.RealReg()], - rt, false, extendOpNone) - case addressModeKindRegSignedImm9: - // e.g. https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled-- - return encodeLoadOrStoreSIMM9(_22to31, 0b00 /* unscaled */, regNumberInEncoding[amode.rn.RealReg()], rt, amode.imm) - case addressModeKindPostIndex: - return encodeLoadOrStoreSIMM9(_22to31, 0b01 /* post index */, regNumberInEncoding[amode.rn.RealReg()], rt, amode.imm) - case addressModeKindPreIndex: - return encodeLoadOrStoreSIMM9(_22to31, 0b11 /* pre index */, regNumberInEncoding[amode.rn.RealReg()], rt, amode.imm) - case addressModeKindRegUnsignedImm12: - // "unsigned immediate" in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Loads-and-Stores?lang=en - rn := regNumberInEncoding[amode.rn.RealReg()] - imm := amode.imm - div := bits / 8 - if imm != 0 && !offsetFitsInAddressModeKindRegUnsignedImm12(byte(bits), imm) { - panic("BUG") - } - imm /= div - return _22to31<<22 | 0b1<<24 | uint32(imm&0b111111111111)<<10 | rn<<5 | rt - default: - panic("BUG") - } -} - -// encodeVecLoad1R encodes as Load one single-element structure and Replicate to all lanes (of one register) in -// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/LD1R--Load-one-single-element-structure-and-Replicate-to-all-lanes--of-one-register--?lang=en#sa_imm -func encodeVecLoad1R(rt, rn uint32, arr vecArrangement) uint32 { - size, q := arrToSizeQEncoded(arr) - return q<<30 | 0b001101010000001100<<12 | size<<10 | rn<<5 | rt -} - -// encodeAluBitmaskImmediate encodes as Logical (immediate) in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en -func encodeAluBitmaskImmediate(op aluOp, rd, rn uint32, imm uint64, _64bit bool) uint32 { - var _31to23 uint32 - switch op { - case aluOpAnd: - _31to23 = 0b00_100100 - case aluOpOrr: - _31to23 = 0b01_100100 - case aluOpEor: - _31to23 = 0b10_100100 - case aluOpAnds: - _31to23 = 0b11_100100 - default: - panic("BUG") - } - if _64bit { - _31to23 |= 0b1 << 8 - } - immr, imms, N := bitmaskImmediate(imm, _64bit) - return _31to23<<23 | uint32(N)<<22 | uint32(immr)<<16 | uint32(imms)<<10 | rn<<5 | rd -} - -func bitmaskImmediate(c uint64, is64bit bool) (immr, imms, N byte) { - var size uint32 - switch { - case c != c>>32|c<<32: - size = 64 - case c != c>>16|c<<48: - size = 32 - c = uint64(int32(c)) - case c != c>>8|c<<56: - size = 16 - c = uint64(int16(c)) - case c != c>>4|c<<60: - size = 8 - c = uint64(int8(c)) - case c != c>>2|c<<62: - size = 4 - c = uint64(int64(c<<60) >> 60) - default: - size = 2 - c = uint64(int64(c<<62) >> 62) - } - - neg := false - if int64(c) < 0 { - c = ^c - neg = true - } - - onesSize, nonZeroPos := getOnesSequenceSize(c) - if neg { - nonZeroPos = onesSize + nonZeroPos - onesSize = size - onesSize - } - - var mode byte = 32 - if is64bit && size == 64 { - N, mode = 0b1, 64 - } - - immr = byte((size - nonZeroPos) & (size - 1) & uint32(mode-1)) - imms = byte((onesSize - 1) | 63&^(size<<1-1)) - return -} - -func getOnesSequenceSize(x uint64) (size, nonZeroPos uint32) { - // Take 0b00111000 for example: - y := getLowestBit(x) // = 0b0000100 - nonZeroPos = setBitPos(y) // = 2 - size = setBitPos(x+y) - nonZeroPos // = setBitPos(0b0100000) - 2 = 5 - 2 = 3 - return -} - -func setBitPos(x uint64) (ret uint32) { - for ; ; ret++ { - if x == 0b1 { - break - } - x = x >> 1 - } - return -} - -// encodeLoadOrStoreExtended encodes store/load instruction as "extended register offset" in Load/store register (register offset): -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Loads-and-Stores?lang=en -func encodeLoadOrStoreExtended(_22to32 uint32, rn, rm, rt uint32, scaled bool, extOp extendOp) uint32 { - var option uint32 - switch extOp { - case extendOpUXTW: - option = 0b010 - case extendOpSXTW: - option = 0b110 - case extendOpNone: - option = 0b111 - default: - panic("BUG") - } - var s uint32 - if scaled { - s = 0b1 - } - return _22to32<<22 | 0b1<<21 | rm<<16 | option<<13 | s<<12 | 0b10<<10 | rn<<5 | rt -} - -// encodeLoadOrStoreSIMM9 encodes store/load instruction as one of post-index, pre-index or unscaled immediate as in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Loads-and-Stores?lang=en -func encodeLoadOrStoreSIMM9(_22to32, _1011 uint32, rn, rt uint32, imm9 int64) uint32 { - return _22to32<<22 | (uint32(imm9)&0b111111111)<<12 | _1011<<10 | rn<<5 | rt -} - -// encodeFpuRRR encodes as single or double precision (depending on `_64bit`) of Floating-point data-processing (2 source) in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en -func encodeFpuRRR(op fpuBinOp, rd, rn, rm uint32, _64bit bool) (ret uint32) { - // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADD--vector--Add-vectors--scalar--floating-point-and-integer- - var opcode uint32 - switch op { - case fpuBinOpAdd: - opcode = 0b0010 - case fpuBinOpSub: - opcode = 0b0011 - case fpuBinOpMul: - opcode = 0b0000 - case fpuBinOpDiv: - opcode = 0b0001 - case fpuBinOpMax: - opcode = 0b0100 - case fpuBinOpMin: - opcode = 0b0101 - default: - panic("BUG") - } - var ptype uint32 - if _64bit { - ptype = 0b01 - } - return 0b1111<<25 | ptype<<22 | 0b1<<21 | rm<<16 | opcode<<12 | 0b1<<11 | rn<<5 | rd -} - -// encodeAluRRImm12 encodes as Add/subtract (immediate) in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en -func encodeAluRRImm12(op aluOp, rd, rn uint32, imm12 uint16, shiftBit byte, _64bit bool) uint32 { - var _31to24 uint32 - switch op { - case aluOpAdd: - _31to24 = 0b00_10001 - case aluOpAddS: - _31to24 = 0b01_10001 - case aluOpSub: - _31to24 = 0b10_10001 - case aluOpSubS: - _31to24 = 0b11_10001 - default: - panic("BUG") - } - if _64bit { - _31to24 |= 0b1 << 7 - } - return _31to24<<24 | uint32(shiftBit)<<22 | uint32(imm12&0b111111111111)<<10 | rn<<5 | rd -} - -// encodeAluRRR encodes as Data Processing (shifted register), depending on aluOp. -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift -func encodeAluRRRShift(op aluOp, rd, rn, rm, amount uint32, shiftOp shiftOp, _64bit bool) uint32 { - var _31to24 uint32 - var opc, n uint32 - switch op { - case aluOpAdd: - _31to24 = 0b00001011 - case aluOpAddS: - _31to24 = 0b00101011 - case aluOpSub: - _31to24 = 0b01001011 - case aluOpSubS: - _31to24 = 0b01101011 - case aluOpAnd, aluOpOrr, aluOpEor, aluOpAnds: - // "Logical (shifted register)". - switch op { - case aluOpAnd: - // all zeros - case aluOpOrr: - opc = 0b01 - case aluOpEor: - opc = 0b10 - case aluOpAnds: - opc = 0b11 - } - _31to24 = 0b000_01010 - default: - panic(op.String()) - } - - if _64bit { - _31to24 |= 0b1 << 7 - } - - var shift uint32 - switch shiftOp { - case shiftOpLSL: - shift = 0b00 - case shiftOpLSR: - shift = 0b01 - case shiftOpASR: - shift = 0b10 - default: - panic(shiftOp.String()) - } - return opc<<29 | n<<21 | _31to24<<24 | shift<<22 | rm<<16 | (amount << 10) | (rn << 5) | rd -} - -// "Add/subtract (extended register)" in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_ext -func encodeAluRRRExtend(ao aluOp, rd, rn, rm uint32, extOp extendOp, to byte) uint32 { - var s, op uint32 - switch ao { - case aluOpAdd: - op = 0b0 - case aluOpAddS: - op, s = 0b0, 0b1 - case aluOpSub: - op = 0b1 - case aluOpSubS: - op, s = 0b1, 0b1 - default: - panic("BUG: extended register operand can be used only for add/sub") - } - - var sf uint32 - if to == 64 { - sf = 0b1 - } - - var option uint32 - switch extOp { - case extendOpUXTB: - option = 0b000 - case extendOpUXTH: - option = 0b001 - case extendOpUXTW: - option = 0b010 - case extendOpSXTB: - option = 0b100 - case extendOpSXTH: - option = 0b101 - case extendOpSXTW: - option = 0b110 - case extendOpSXTX, extendOpUXTX: - panic(fmt.Sprintf("%s is essentially noop, and should be handled much earlier than encoding", extOp.String())) - } - return sf<<31 | op<<30 | s<<29 | 0b1011001<<21 | rm<<16 | option<<13 | rn<<5 | rd -} - -// encodeAluRRR encodes as Data Processing (register), depending on aluOp. -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en -func encodeAluRRR(op aluOp, rd, rn, rm uint32, _64bit, isRnSp bool) uint32 { - var _31to21, _15to10 uint32 - switch op { - case aluOpAdd: - if isRnSp { - // "Extended register" with UXTW. - _31to21 = 0b00001011_001 - _15to10 = 0b011000 - } else { - // "Shifted register" with shift = 0 - _31to21 = 0b00001011_000 - } - case aluOpAddS: - if isRnSp { - panic("TODO") - } - // "Shifted register" with shift = 0 - _31to21 = 0b00101011_000 - case aluOpSub: - if isRnSp { - // "Extended register" with UXTW. - _31to21 = 0b01001011_001 - _15to10 = 0b011000 - } else { - // "Shifted register" with shift = 0 - _31to21 = 0b01001011_000 - } - case aluOpSubS: - if isRnSp { - panic("TODO") - } - // "Shifted register" with shift = 0 - _31to21 = 0b01101011_000 - case aluOpAnd, aluOpOrr, aluOpOrn, aluOpEor, aluOpAnds: - // "Logical (shifted register)". - var opc, n uint32 - switch op { - case aluOpAnd: - // all zeros - case aluOpOrr: - opc = 0b01 - case aluOpOrn: - opc = 0b01 - n = 1 - case aluOpEor: - opc = 0b10 - case aluOpAnds: - opc = 0b11 - } - _31to21 = 0b000_01010_000 | opc<<8 | n - case aluOpLsl, aluOpAsr, aluOpLsr, aluOpRotR: - // "Data-processing (2 source)". - _31to21 = 0b00011010_110 - switch op { - case aluOpLsl: - _15to10 = 0b001000 - case aluOpLsr: - _15to10 = 0b001001 - case aluOpAsr: - _15to10 = 0b001010 - case aluOpRotR: - _15to10 = 0b001011 - } - case aluOpSDiv: - // "Data-processing (2 source)". - _31to21 = 0b11010110 - _15to10 = 0b000011 - case aluOpUDiv: - // "Data-processing (2 source)". - _31to21 = 0b11010110 - _15to10 = 0b000010 - default: - panic(op.String()) - } - if _64bit { - _31to21 |= 0b1 << 10 - } - return _31to21<<21 | rm<<16 | (_15to10 << 10) | (rn << 5) | rd -} - -// encodeLogicalShiftedRegister encodes as Logical (shifted register) in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en -func encodeLogicalShiftedRegister(sf_opc uint32, shift_N uint32, rm uint32, imm6 uint32, rn, rd uint32) (ret uint32) { - ret = sf_opc << 29 - ret |= 0b01010 << 24 - ret |= shift_N << 21 - ret |= rm << 16 - ret |= imm6 << 10 - ret |= rn << 5 - ret |= rd - return -} - -// encodeAddSubtractImmediate encodes as Add/subtract (immediate) in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en -func encodeAddSubtractImmediate(sf_op_s uint32, sh uint32, imm12 uint32, rn, rd uint32) (ret uint32) { - ret = sf_op_s << 29 - ret |= 0b100010 << 23 - ret |= sh << 22 - ret |= imm12 << 10 - ret |= rn << 5 - ret |= rd - return -} - -// encodePreOrPostIndexLoadStorePair64 encodes as Load/store pair (pre/post-indexed) in -// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers- -// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers- -func encodePreOrPostIndexLoadStorePair64(pre bool, load bool, rn, rt, rt2 uint32, imm7 int64) (ret uint32) { - if imm7%8 != 0 { - panic("imm7 for pair load/store must be a multiple of 8") - } - imm7 /= 8 - ret = rt - ret |= rn << 5 - ret |= rt2 << 10 - ret |= (uint32(imm7) & 0b1111111) << 15 - if load { - ret |= 0b1 << 22 - } - ret |= 0b101010001 << 23 - if pre { - ret |= 0b1 << 24 - } - return -} - -// encodeUnconditionalBranch encodes as B or BL instructions: -// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch- -// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link- -func encodeUnconditionalBranch(link bool, imm26 int64) (ret uint32) { - if imm26%4 != 0 { - panic("imm26 for branch must be a multiple of 4") - } - imm26 /= 4 - ret = uint32(imm26 & 0b11_11111111_11111111_11111111) - ret |= 0b101 << 26 - if link { - ret |= 0b1 << 31 - } - return -} - -// encodeCBZCBNZ encodes as either CBZ or CBNZ: -// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CBZ--Compare-and-Branch-on-Zero- -// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CBNZ--Compare-and-Branch-on-Nonzero- -func encodeCBZCBNZ(rt uint32, nz bool, imm19 uint32, _64bit bool) (ret uint32) { - ret = rt - ret |= imm19 << 5 - if nz { - ret |= 1 << 24 - } - ret |= 0b11010 << 25 - if _64bit { - ret |= 1 << 31 - } - return -} - -// encodeMoveWideImmediate encodes as either MOVZ, MOVN or MOVK, as Move wide (immediate) in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en -// -// "shift" must have been divided by 16 at this point. -func encodeMoveWideImmediate(opc uint32, rd uint32, imm uint64, shift, _64bit uint32) (ret uint32) { - ret = rd - ret |= uint32(imm&0xffff) << 5 - ret |= (shift) << 21 - ret |= 0b100101 << 23 - ret |= opc << 29 - ret |= _64bit << 31 - return -} - -// encodeAluRRImm encodes as "Bitfield" in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm -func encodeAluRRImm(op aluOp, rd, rn, amount, _64bit uint32) uint32 { - var opc uint32 - var immr, imms uint32 - switch op { - case aluOpLsl: - // LSL (immediate) is an alias for UBFM. - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/UBFM--Unsigned-Bitfield-Move-?lang=en - opc = 0b10 - if amount == 0 { - // This can be encoded as NOP, but we don't do it for consistency: lsr xn, xm, #0 - immr = 0 - if _64bit == 1 { - imms = 0b111111 - } else { - imms = 0b11111 - } - } else { - if _64bit == 1 { - immr = 64 - amount - } else { - immr = (32 - amount) & 0b11111 - } - imms = immr - 1 - } - case aluOpLsr: - // LSR (immediate) is an alias for UBFM. - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en - opc = 0b10 - imms, immr = 0b011111|_64bit<<5, amount - case aluOpAsr: - // ASR (immediate) is an alias for SBFM. - // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SBFM--Signed-Bitfield-Move-?lang=en - opc = 0b00 - imms, immr = 0b011111|_64bit<<5, amount - default: - panic(op.String()) - } - return _64bit<<31 | opc<<29 | 0b100110<<23 | _64bit<<22 | immr<<16 | imms<<10 | rn<<5 | rd -} - -// encodeVecLanes encodes as Data Processing (Advanced SIMD across lanes) depending on vecOp in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en -func encodeVecLanes(op vecOp, rd uint32, rn uint32, arr vecArrangement) uint32 { - var u, q, size, opcode uint32 - switch arr { - case vecArrangement8B: - q, size = 0b0, 0b00 - case vecArrangement16B: - q, size = 0b1, 0b00 - case vecArrangement4H: - q, size = 0, 0b01 - case vecArrangement8H: - q, size = 1, 0b01 - case vecArrangement4S: - q, size = 1, 0b10 - default: - panic("unsupported arrangement: " + arr.String()) - } - switch op { - case vecOpUaddlv: - u, opcode = 1, 0b00011 - case vecOpUminv: - u, opcode = 1, 0b11010 - case vecOpAddv: - u, opcode = 0, 0b11011 - default: - panic("unsupported or illegal vecOp: " + op.String()) - } - return q<<30 | u<<29 | 0b1110<<24 | size<<22 | 0b11000<<17 | opcode<<12 | 0b10<<10 | rn<<5 | rd -} - -// encodeVecLanes encodes as Data Processing (Advanced SIMD scalar shift by immediate) depending on vecOp in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en -func encodeVecShiftImm(op vecOp, rd uint32, rn, amount uint32, arr vecArrangement) uint32 { - var u, q, immh, immb, opcode uint32 - switch op { - case vecOpSshll: - u, opcode = 0b0, 0b10100 - case vecOpUshll: - u, opcode = 0b1, 0b10100 - case vecOpSshr: - u, opcode = 0, 0b00000 - default: - panic("unsupported or illegal vecOp: " + op.String()) - } - switch arr { - case vecArrangement16B: - q = 0b1 - fallthrough - case vecArrangement8B: - immh = 0b0001 - immb = 8 - uint32(amount&0b111) - case vecArrangement8H: - q = 0b1 - fallthrough - case vecArrangement4H: - v := 16 - uint32(amount&0b1111) - immb = v & 0b111 - immh = 0b0010 | (v >> 3) - case vecArrangement4S: - q = 0b1 - fallthrough - case vecArrangement2S: - v := 32 - uint32(amount&0b11111) - immb = v & 0b111 - immh = 0b0100 | (v >> 3) - case vecArrangement2D: - q = 0b1 - v := 64 - uint32(amount&0b111111) - immb = v & 0b111 - immh = 0b1000 | (v >> 3) - default: - panic("unsupported arrangement: " + arr.String()) - } - return q<<30 | u<<29 | 0b011110<<23 | immh<<19 | immb<<16 | 0b000001<<10 | opcode<<11 | 0b1<<10 | rn<<5 | rd -} - -// encodeVecTbl encodes as Data Processing (Advanced SIMD table lookup) in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp -// -// Note: tblOp may encode tbl1, tbl2... in the future. Currently, it is ignored. -func encodeVecTbl(nregs, rd, rn, rm uint32, arr vecArrangement) uint32 { - var q, op2, len, op uint32 - - switch nregs { - case 1: - // tbl: single-register - len = 0b00 - case 2: - // tbl2: 2-register table - len = 0b01 - default: - panic(fmt.Sprintf("unsupported number or registers %d", nregs)) - } - switch arr { - case vecArrangement8B: - q = 0b0 - case vecArrangement16B: - q = 0b1 - default: - panic("unsupported arrangement: " + arr.String()) - } - - return q<<30 | 0b001110<<24 | op2<<22 | rm<<16 | len<<13 | op<<12 | rn<<5 | rd -} - -// encodeVecMisc encodes as Data Processing (Advanced SIMD two-register miscellaneous) depending on vecOp in -// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp -func encodeAdvancedSIMDTwoMisc(op vecOp, rd, rn uint32, arr vecArrangement) uint32 { - var q, u, size, opcode uint32 - switch op { - case vecOpCnt: - opcode = 0b00101 - switch arr { - case vecArrangement8B: - q, size = 0b0, 0b00 - case vecArrangement16B: - q, size = 0b1, 0b00 - default: - panic("unsupported arrangement: " + arr.String()) - } - case vecOpCmeq0: - if arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - opcode = 0b01001 - size, q = arrToSizeQEncoded(arr) - case vecOpNot: - u = 1 - opcode = 0b00101 - switch arr { - case vecArrangement8B: - q, size = 0b0, 0b00 - case vecArrangement16B: - q, size = 0b1, 0b00 - default: - panic("unsupported arrangement: " + arr.String()) - } - case vecOpAbs: - if arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - opcode = 0b01011 - u = 0b0 - size, q = arrToSizeQEncoded(arr) - case vecOpNeg: - if arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - opcode = 0b01011 - u = 0b1 - size, q = arrToSizeQEncoded(arr) - case vecOpFabs: - if arr < vecArrangement2S || arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - opcode = 0b01111 - u = 0b0 - size, q = arrToSizeQEncoded(arr) - case vecOpFneg: - if arr < vecArrangement2S || arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - opcode = 0b01111 - u = 0b1 - size, q = arrToSizeQEncoded(arr) - case vecOpFrintm: - u = 0b0 - opcode = 0b11001 - switch arr { - case vecArrangement2S: - q, size = 0b0, 0b00 - case vecArrangement4S: - q, size = 0b1, 0b00 - case vecArrangement2D: - q, size = 0b1, 0b01 - default: - panic("unsupported arrangement: " + arr.String()) - } - case vecOpFrintn: - u = 0b0 - opcode = 0b11000 - switch arr { - case vecArrangement2S: - q, size = 0b0, 0b00 - case vecArrangement4S: - q, size = 0b1, 0b00 - case vecArrangement2D: - q, size = 0b1, 0b01 - default: - panic("unsupported arrangement: " + arr.String()) - } - case vecOpFrintp: - u = 0b0 - opcode = 0b11000 - if arr < vecArrangement2S || arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - size, q = arrToSizeQEncoded(arr) - case vecOpFrintz: - u = 0b0 - opcode = 0b11001 - if arr < vecArrangement2S || arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - size, q = arrToSizeQEncoded(arr) - case vecOpFsqrt: - if arr < vecArrangement2S || arr == vecArrangement1D { - panic("unsupported arrangement: " + arr.String()) - } - opcode = 0b11111 - u = 0b1 - size, q = arrToSizeQEncoded(arr) - case vecOpFcvtl: - opcode = 0b10111 - u = 0b0 - switch arr { - case vecArrangement2S: - size, q = 0b01, 0b0 - case vecArrangement4H: - size, q = 0b00, 0b0 - default: - panic("unsupported arrangement: " + arr.String()) - } - case vecOpFcvtn: - opcode = 0b10110 - u = 0b0 - switch arr { - case vecArrangement2S: - size, q = 0b01, 0b0 - case vecArrangement4H: - size, q = 0b00, 0b0 - default: - panic("unsupported arrangement: " + arr.String()) - } - case vecOpFcvtzs: - opcode = 0b11011 - u = 0b0 - switch arr { - case vecArrangement2S: - q, size = 0b0, 0b10 - case vecArrangement4S: - q, size = 0b1, 0b10 - case vecArrangement2D: - q, size = 0b1, 0b11 - default: - panic("unsupported arrangement: " + arr.String()) - } - case vecOpFcvtzu: - opcode = 0b11011 - u = 0b1 - switch arr { - case vecArrangement2S: - q, size = 0b0, 0b10 - case vecArrangement4S: - q, size = 0b1, 0b10 - case vecArrangement2D: - q, size = 0b1, 0b11 - default: - panic("unsupported arrangement: " + arr.String()) - } - case vecOpScvtf: - opcode = 0b11101 - u = 0b0 - switch arr { - case vecArrangement4S: - q, size = 0b1, 0b00 - case vecArrangement2S: - q, size = 0b0, 0b00 - case vecArrangement2D: - q, size = 0b1, 0b01 - default: - panic("unsupported arrangement: " + arr.String()) - } - case vecOpUcvtf: - opcode = 0b11101 - u = 0b1 - switch arr { - case vecArrangement4S: - q, size = 0b1, 0b00 - case vecArrangement2S: - q, size = 0b0, 0b00 - case vecArrangement2D: - q, size = 0b1, 0b01 - default: - panic("unsupported arrangement: " + arr.String()) - } - case vecOpSqxtn: - // When q == 1 it encodes sqxtn2 (operates on upper 64 bits). - opcode = 0b10100 - u = 0b0 - if arr > vecArrangement4S { - panic("unsupported arrangement: " + arr.String()) - } - size, q = arrToSizeQEncoded(arr) - case vecOpUqxtn: - // When q == 1 it encodes uqxtn2 (operates on upper 64 bits). - opcode = 0b10100 - u = 0b1 - if arr > vecArrangement4S { - panic("unsupported arrangement: " + arr.String()) - } - size, q = arrToSizeQEncoded(arr) - case vecOpSqxtun: - // When q == 1 it encodes sqxtun2 (operates on upper 64 bits). - opcode = 0b10010 // 0b10100 - u = 0b1 - if arr > vecArrangement4S { - panic("unsupported arrangement: " + arr.String()) - } - size, q = arrToSizeQEncoded(arr) - case vecOpRev64: - opcode = 0b00000 - size, q = arrToSizeQEncoded(arr) - case vecOpXtn: - u = 0b0 - opcode = 0b10010 - size, q = arrToSizeQEncoded(arr) - case vecOpShll: - u = 0b1 - opcode = 0b10011 - switch arr { - case vecArrangement8B: - q, size = 0b0, 0b00 - case vecArrangement4H: - q, size = 0b0, 0b01 - case vecArrangement2S: - q, size = 0b0, 0b10 - default: - panic("unsupported arrangement: " + arr.String()) - } - default: - panic("unsupported or illegal vecOp: " + op.String()) - } - return q<<30 | u<<29 | 0b01110<<24 | size<<22 | 0b10000<<17 | opcode<<12 | 0b10<<10 | rn<<5 | rd -} - -// brTableSequenceOffsetTableBegin is the offset inside the brTableSequence where the table begins after 4 instructions -const brTableSequenceOffsetTableBegin = 16 - -func encodeBrTableSequence(c backend.Compiler, index regalloc.VReg, targets []uint32) { - tmpRegNumber := regNumberInEncoding[tmp] - indexNumber := regNumberInEncoding[index.RealReg()] - - // adr tmpReg, PC+16 (PC+16 is the address of the first label offset) - // ldrsw index, [tmpReg, index, UXTW 2] ;; index = int64(*(tmpReg + index*8)) - // add tmpReg, tmpReg, index - // br tmpReg - // [offset_to_l1, offset_to_l2, ..., offset_to_lN] - c.Emit4Bytes(encodeAdr(tmpRegNumber, 16)) - c.Emit4Bytes(encodeLoadOrStore(sLoad32, indexNumber, - addressMode{kind: addressModeKindRegScaledExtended, rn: tmpRegVReg, rm: index, extOp: extendOpUXTW}, - )) - c.Emit4Bytes(encodeAluRRR(aluOpAdd, tmpRegNumber, tmpRegNumber, indexNumber, true, false)) - c.Emit4Bytes(encodeUnconditionalBranchReg(tmpRegNumber, false)) - - // Offsets are resolved in ResolveRelativeAddress phase. - for _, offset := range targets { - if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { - // Inlined offset tables cannot be disassembled properly, so pad dummy instructions to make the debugging easier. - c.Emit4Bytes(dummyInstruction) - } else { - c.Emit4Bytes(offset) - } - } -} - -// encodeExitSequence matches the implementation detail of functionABI.emitGoEntryPreamble. -func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) { - // Restore the FP, SP and LR, and return to the Go code: - // ldr lr, [ctxReg, #GoReturnAddress] - // ldr fp, [ctxReg, #OriginalFramePointer] - // ldr tmp, [ctxReg, #OriginalStackPointer] - // mov sp, tmp ;; sp cannot be str'ed directly. - // ret ;; --> return to the Go code - - var ctxEvicted bool - if ctx := ctxReg.RealReg(); ctx == fp || ctx == lr { - // In order to avoid overwriting the context register, we move ctxReg to tmp. - c.Emit4Bytes(encodeMov64(regNumberInEncoding[tmp], regNumberInEncoding[ctx], false, false)) - ctxReg = tmpRegVReg - ctxEvicted = true - } - - restoreLr := encodeLoadOrStore( - uLoad64, - regNumberInEncoding[lr], - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: ctxReg, - imm: wazevoapi.ExecutionContextOffsetGoReturnAddress.I64(), - }, - ) - - restoreFp := encodeLoadOrStore( - uLoad64, - regNumberInEncoding[fp], - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: ctxReg, - imm: wazevoapi.ExecutionContextOffsetOriginalFramePointer.I64(), - }, - ) - - restoreSpToTmp := encodeLoadOrStore( - uLoad64, - regNumberInEncoding[tmp], - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: ctxReg, - imm: wazevoapi.ExecutionContextOffsetOriginalStackPointer.I64(), - }, - ) - - movTmpToSp := encodeAddSubtractImmediate(0b100, 0, 0, - regNumberInEncoding[tmp], regNumberInEncoding[sp]) - - c.Emit4Bytes(restoreFp) - c.Emit4Bytes(restoreLr) - c.Emit4Bytes(restoreSpToTmp) - c.Emit4Bytes(movTmpToSp) - c.Emit4Bytes(encodeRet()) - if !ctxEvicted { - // In order to have the fixed-length exit sequence, we need to padd the binary. - // Since this will never be reached, we insert a dummy instruction. - c.Emit4Bytes(dummyInstruction) - } -} - -func encodeRet() uint32 { - // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/RET--Return-from-subroutine-?lang=en - return 0b1101011001011111<<16 | regNumberInEncoding[lr]<<5 -} - -func encodeAtomicRmw(op atomicRmwOp, rs, rt, rn uint32, size uint32) uint32 { - var _31to21, _15to10, sz uint32 - - switch size { - case 8: - sz = 0b11 - case 4: - sz = 0b10 - case 2: - sz = 0b01 - case 1: - sz = 0b00 - } - - _31to21 = 0b00111000_111 | sz<<9 - - switch op { - case atomicRmwOpAdd: - _15to10 = 0b000000 - case atomicRmwOpClr: - _15to10 = 0b000100 - case atomicRmwOpSet: - _15to10 = 0b001100 - case atomicRmwOpEor: - _15to10 = 0b001000 - case atomicRmwOpSwp: - _15to10 = 0b100000 - } - - return _31to21<<21 | rs<<16 | _15to10<<10 | rn<<5 | rt -} - -func encodeAtomicCas(rs, rt, rn uint32, size uint32) uint32 { - var _31to21, _15to10, sz uint32 - - switch size { - case 8: - sz = 0b11 - case 4: - sz = 0b10 - case 2: - sz = 0b01 - case 1: - sz = 0b00 - } - - _31to21 = 0b00001000_111 | sz<<9 - _15to10 = 0b111111 - - return _31to21<<21 | rs<<16 | _15to10<<10 | rn<<5 | rt -} - -func encodeAtomicLoadStore(rn, rt, size, l uint32) uint32 { - var _31to21, _20to16, _15to10, sz uint32 - - switch size { - case 8: - sz = 0b11 - case 4: - sz = 0b10 - case 2: - sz = 0b01 - case 1: - sz = 0b00 - } - - _31to21 = 0b00001000_100 | sz<<9 | l<<1 - _20to16 = 0b11111 - _15to10 = 0b111111 - - return _31to21<<21 | _20to16<<16 | _15to10<<10 | rn<<5 | rt -} - -func encodeDMB() uint32 { - return 0b11010101000000110011101110111111 -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go deleted file mode 100644 index 6c6824fb0..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go +++ /dev/null @@ -1,301 +0,0 @@ -package arm64 - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -// lowerConstant allocates a new VReg and inserts the instruction to load the constant value. -func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) { - val := instr.Return() - valType := val.Type() - - vr = m.compiler.AllocateVReg(valType) - v := instr.ConstantVal() - m.insertLoadConstant(v, valType, vr) - return -} - -// InsertLoadConstantBlockArg implements backend.Machine. -func (m *machine) InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) { - val := instr.Return() - valType := val.Type() - v := instr.ConstantVal() - load := m.allocateInstr() - load.asLoadConstBlockArg(v, valType, vr) - m.insert(load) -} - -func (m *machine) lowerLoadConstantBlockArgAfterRegAlloc(i *instruction) { - v, typ, dst := i.loadConstBlockArgData() - m.insertLoadConstant(v, typ, dst) -} - -func (m *machine) insertLoadConstant(v uint64, valType ssa.Type, vr regalloc.VReg) { - if valType.Bits() < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc. - v = v & ((1 << valType.Bits()) - 1) - } - - switch valType { - case ssa.TypeF32: - loadF := m.allocateInstr() - loadF.asLoadFpuConst32(vr, v) - m.insert(loadF) - case ssa.TypeF64: - loadF := m.allocateInstr() - loadF.asLoadFpuConst64(vr, v) - m.insert(loadF) - case ssa.TypeI32: - if v == 0 { - m.InsertMove(vr, xzrVReg, ssa.TypeI32) - } else { - m.lowerConstantI32(vr, int32(v)) - } - case ssa.TypeI64: - if v == 0 { - m.InsertMove(vr, xzrVReg, ssa.TypeI64) - } else { - m.lowerConstantI64(vr, int64(v)) - } - default: - panic("TODO") - } -} - -// The following logics are based on the old asm/arm64 package. -// https://github.com/tetratelabs/wazero/blob/39f2ff23a6d609e10c82b9cc0b981f6de5b87a9c/internal/asm/arm64/impl.go - -func (m *machine) lowerConstantI32(dst regalloc.VReg, c int32) { - // Following the logic here: - // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1637 - ic := int64(uint32(c)) - if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) { - if isBitMaskImmediate(uint64(c), false) { - m.lowerConstViaBitMaskImmediate(uint64(uint32(c)), dst, false) - return - } - } - - if t := const16bitAligned(int64(uint32(c))); t >= 0 { - // If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000 - // We could load it into temporary with movk. - m.insertMOVZ(dst, uint64(uint32(c)>>(16*t)), t, false) - } else if t := const16bitAligned(int64(^c)); t >= 0 { - // Also, if the inverse of the const can fit within 16-bit range, do the same ^^. - m.insertMOVN(dst, uint64(^c>>(16*t)), t, false) - } else if isBitMaskImmediate(uint64(uint32(c)), false) { - m.lowerConstViaBitMaskImmediate(uint64(c), dst, false) - } else { - // Otherwise, we use MOVZ and MOVK to load it. - c16 := uint16(c) - m.insertMOVZ(dst, uint64(c16), 0, false) - c16 = uint16(uint32(c) >> 16) - m.insertMOVK(dst, uint64(c16), 1, false) - } -} - -func (m *machine) lowerConstantI64(dst regalloc.VReg, c int64) { - // Following the logic here: - // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1798-L1852 - if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) { - if isBitMaskImmediate(uint64(c), true) { - m.lowerConstViaBitMaskImmediate(uint64(c), dst, true) - return - } - } - - if t := const16bitAligned(c); t >= 0 { - // If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000 - // We could load it into temporary with movk. - m.insertMOVZ(dst, uint64(c)>>(16*t), t, true) - } else if t := const16bitAligned(^c); t >= 0 { - // Also, if the reverse of the const can fit within 16-bit range, do the same ^^. - m.insertMOVN(dst, uint64(^c)>>(16*t), t, true) - } else if isBitMaskImmediate(uint64(c), true) { - m.lowerConstViaBitMaskImmediate(uint64(c), dst, true) - } else { - m.load64bitConst(c, dst) - } -} - -func (m *machine) lowerConstViaBitMaskImmediate(c uint64, dst regalloc.VReg, b64 bool) { - instr := m.allocateInstr() - instr.asALUBitmaskImm(aluOpOrr, dst, xzrVReg, c, b64) - m.insert(instr) -} - -// isBitMaskImmediate determines if the value can be encoded as "bitmask immediate". -// -// Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of identical elements of size e = 2, 4, 8, 16, 32, or 64 bits. -// Each element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by 0 to e-1 bits. -// -// See https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/MOV--bitmask-immediate- -func isBitMaskImmediate(x uint64, _64 bool) bool { - // All zeros and ones are not "bitmask immediate" by definition. - if x == 0 || (_64 && x == 0xffff_ffff_ffff_ffff) || (!_64 && x == 0xffff_ffff) { - return false - } - - switch { - case x != x>>32|x<<32: - // e = 64 - case x != x>>16|x<<48: - // e = 32 (x == x>>32|x<<32). - // e.g. 0x00ff_ff00_00ff_ff00 - x = uint64(int32(x)) - case x != x>>8|x<<56: - // e = 16 (x == x>>16|x<<48). - // e.g. 0x00ff_00ff_00ff_00ff - x = uint64(int16(x)) - case x != x>>4|x<<60: - // e = 8 (x == x>>8|x<<56). - // e.g. 0x0f0f_0f0f_0f0f_0f0f - x = uint64(int8(x)) - default: - // e = 4 or 2. - return true - } - return sequenceOfSetbits(x) || sequenceOfSetbits(^x) -} - -// sequenceOfSetbits returns true if the number's binary representation is the sequence set bit (1). -// For example: 0b1110 -> true, 0b1010 -> false -func sequenceOfSetbits(x uint64) bool { - y := getLowestBit(x) - // If x is a sequence of set bit, this should results in the number - // with only one set bit (i.e. power of two). - y += x - return (y-1)&y == 0 -} - -func getLowestBit(x uint64) uint64 { - return x & (^x + 1) -} - -// const16bitAligned check if the value is on the 16-bit alignment. -// If so, returns the shift num divided by 16, and otherwise -1. -func const16bitAligned(v int64) (ret int) { - ret = -1 - for s := 0; s < 64; s += 16 { - if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 { - ret = s / 16 - break - } - } - return -} - -// load64bitConst loads a 64-bit constant into the register, following the same logic to decide how to load large 64-bit -// consts as in the Go assembler. -// -// See https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6632-L6759 -func (m *machine) load64bitConst(c int64, dst regalloc.VReg) { - var bits [4]uint64 - var zeros, negs int - for i := 0; i < 4; i++ { - bits[i] = uint64(c) >> uint(i*16) & 0xffff - if v := bits[i]; v == 0 { - zeros++ - } else if v == 0xffff { - negs++ - } - } - - if zeros == 3 { - // one MOVZ instruction. - for i, v := range bits { - if v != 0 { - m.insertMOVZ(dst, v, i, true) - } - } - } else if negs == 3 { - // one MOVN instruction. - for i, v := range bits { - if v != 0xffff { - v = ^v - m.insertMOVN(dst, v, i, true) - } - } - } else if zeros == 2 { - // one MOVZ then one OVK. - var movz bool - for i, v := range bits { - if !movz && v != 0 { // MOVZ. - m.insertMOVZ(dst, v, i, true) - movz = true - } else if v != 0 { - m.insertMOVK(dst, v, i, true) - } - } - - } else if negs == 2 { - // one MOVN then one or two MOVK. - var movn bool - for i, v := range bits { // Emit MOVN. - if !movn && v != 0xffff { - v = ^v - // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN - m.insertMOVN(dst, v, i, true) - movn = true - } else if v != 0xffff { - m.insertMOVK(dst, v, i, true) - } - } - - } else if zeros == 1 { - // one MOVZ then two MOVK. - var movz bool - for i, v := range bits { - if !movz && v != 0 { // MOVZ. - m.insertMOVZ(dst, v, i, true) - movz = true - } else if v != 0 { - m.insertMOVK(dst, v, i, true) - } - } - - } else if negs == 1 { - // one MOVN then two MOVK. - var movn bool - for i, v := range bits { // Emit MOVN. - if !movn && v != 0xffff { - v = ^v - // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN - m.insertMOVN(dst, v, i, true) - movn = true - } else if v != 0xffff { - m.insertMOVK(dst, v, i, true) - } - } - - } else { - // one MOVZ then up to three MOVK. - var movz bool - for i, v := range bits { - if !movz && v != 0 { // MOVZ. - m.insertMOVZ(dst, v, i, true) - movz = true - } else if v != 0 { - m.insertMOVK(dst, v, i, true) - } - } - } -} - -func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) { - instr := m.allocateInstr() - instr.asMOVZ(dst, v, uint32(shift), dst64) - m.insert(instr) -} - -func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) { - instr := m.allocateInstr() - instr.asMOVK(dst, v, uint32(shift), dst64) - m.insert(instr) -} - -func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) { - instr := m.allocateInstr() - instr.asMOVN(dst, v, uint32(shift), dst64) - m.insert(instr) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go deleted file mode 100644 index f9df356c0..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ /dev/null @@ -1,2224 +0,0 @@ -package arm64 - -// Files prefixed as lower_instr** do the instruction selection, meaning that lowering SSA level instructions -// into machine specific instructions. -// -// Importantly, what the lower** functions does includes tree-matching; find the pattern from the given instruction tree, -// and merge the multiple instructions if possible. It can be considered as "N:1" instruction selection. - -import ( - "fmt" - "math" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -// LowerSingleBranch implements backend.Machine. -func (m *machine) LowerSingleBranch(br *ssa.Instruction) { - switch br.Opcode() { - case ssa.OpcodeJump: - _, _, targetBlkID := br.BranchData() - if br.IsFallthroughJump() { - return - } - b := m.allocateInstr() - targetBlk := m.compiler.SSABuilder().BasicBlock(targetBlkID) - if targetBlk.ReturnBlock() { - b.asRet() - } else { - b.asBr(ssaBlockLabel(targetBlk)) - } - m.insert(b) - case ssa.OpcodeBrTable: - m.lowerBrTable(br) - default: - panic("BUG: unexpected branch opcode" + br.Opcode().String()) - } -} - -func (m *machine) lowerBrTable(i *ssa.Instruction) { - index, targetBlockIDs := i.BrTableData() - targetBlockCount := len(targetBlockIDs.View()) - indexOperand := m.getOperand_NR(m.compiler.ValueDefinition(index), extModeNone) - - // Firstly, we have to do the bounds check of the index, and - // set it to the default target (sitting at the end of the list) if it's out of bounds. - - // mov maxIndexReg #maximum_index - // subs wzr, index, maxIndexReg - // csel adjustedIndex, maxIndexReg, index, hs ;; if index is higher or equal than maxIndexReg. - maxIndexReg := m.compiler.AllocateVReg(ssa.TypeI32) - m.lowerConstantI32(maxIndexReg, int32(targetBlockCount-1)) - subs := m.allocateInstr() - subs.asALU(aluOpSubS, xzrVReg, indexOperand, operandNR(maxIndexReg), false) - m.insert(subs) - csel := m.allocateInstr() - adjustedIndex := m.compiler.AllocateVReg(ssa.TypeI32) - csel.asCSel(adjustedIndex, operandNR(maxIndexReg), indexOperand, hs, false) - m.insert(csel) - - brSequence := m.allocateInstr() - - tableIndex := m.addJmpTableTarget(targetBlockIDs) - brSequence.asBrTableSequence(adjustedIndex, tableIndex, targetBlockCount) - m.insert(brSequence) -} - -// LowerConditionalBranch implements backend.Machine. -func (m *machine) LowerConditionalBranch(b *ssa.Instruction) { - cval, args, targetBlkID := b.BranchData() - if len(args) > 0 { - panic(fmt.Sprintf( - "conditional branch shouldn't have args; likely a bug in critical edge splitting: from %s to %s", - m.currentLabelPos.sb, - targetBlkID, - )) - } - - targetBlk := m.compiler.SSABuilder().BasicBlock(targetBlkID) - target := ssaBlockLabel(targetBlk) - cvalDef := m.compiler.ValueDefinition(cval) - - switch { - case m.compiler.MatchInstr(cvalDef, ssa.OpcodeIcmp): // This case, we can use the ALU flag set by SUBS instruction. - cvalInstr := cvalDef.Instr - x, y, c := cvalInstr.IcmpData() - cc, signed := condFlagFromSSAIntegerCmpCond(c), c.Signed() - if b.Opcode() == ssa.OpcodeBrz { - cc = cc.invert() - } - - if !m.tryLowerBandToFlag(x, y) { - m.lowerIcmpToFlag(x, y, signed) - } - cbr := m.allocateInstr() - cbr.asCondBr(cc.asCond(), target, false /* ignored */) - m.insert(cbr) - cvalDef.Instr.MarkLowered() - case m.compiler.MatchInstr(cvalDef, ssa.OpcodeFcmp): // This case we can use the Fpu flag directly. - cvalInstr := cvalDef.Instr - x, y, c := cvalInstr.FcmpData() - cc := condFlagFromSSAFloatCmpCond(c) - if b.Opcode() == ssa.OpcodeBrz { - cc = cc.invert() - } - m.lowerFcmpToFlag(x, y) - cbr := m.allocateInstr() - cbr.asCondBr(cc.asCond(), target, false /* ignored */) - m.insert(cbr) - cvalDef.Instr.MarkLowered() - default: - rn := m.getOperand_NR(cvalDef, extModeNone) - var c cond - if b.Opcode() == ssa.OpcodeBrz { - c = registerAsRegZeroCond(rn.nr()) - } else { - c = registerAsRegNotZeroCond(rn.nr()) - } - cbr := m.allocateInstr() - cbr.asCondBr(c, target, false) - m.insert(cbr) - } -} - -func (m *machine) tryLowerBandToFlag(x, y ssa.Value) (ok bool) { - xx := m.compiler.ValueDefinition(x) - yy := m.compiler.ValueDefinition(y) - if xx.IsFromInstr() && xx.Instr.Constant() && xx.Instr.ConstantVal() == 0 { - if m.compiler.MatchInstr(yy, ssa.OpcodeBand) { - bandInstr := yy.Instr - m.lowerBitwiseAluOp(bandInstr, aluOpAnds, true) - ok = true - bandInstr.MarkLowered() - return - } - } - - if yy.IsFromInstr() && yy.Instr.Constant() && yy.Instr.ConstantVal() == 0 { - if m.compiler.MatchInstr(xx, ssa.OpcodeBand) { - bandInstr := xx.Instr - m.lowerBitwiseAluOp(bandInstr, aluOpAnds, true) - ok = true - bandInstr.MarkLowered() - return - } - } - return -} - -// LowerInstr implements backend.Machine. -func (m *machine) LowerInstr(instr *ssa.Instruction) { - if l := instr.SourceOffset(); l.Valid() { - info := m.allocateInstr().asEmitSourceOffsetInfo(l) - m.insert(info) - } - - switch op := instr.Opcode(); op { - case ssa.OpcodeBrz, ssa.OpcodeBrnz, ssa.OpcodeJump, ssa.OpcodeBrTable: - panic("BUG: branching instructions are handled by LowerBranches") - case ssa.OpcodeReturn: - panic("BUG: return must be handled by backend.Compiler") - case ssa.OpcodeIadd, ssa.OpcodeIsub: - m.lowerSubOrAdd(instr, op == ssa.OpcodeIadd) - case ssa.OpcodeFadd, ssa.OpcodeFsub, ssa.OpcodeFmul, ssa.OpcodeFdiv, ssa.OpcodeFmax, ssa.OpcodeFmin: - m.lowerFpuBinOp(instr) - case ssa.OpcodeIconst, ssa.OpcodeF32const, ssa.OpcodeF64const: // Constant instructions are inlined. - case ssa.OpcodeExitWithCode: - execCtx, code := instr.ExitWithCodeData() - m.lowerExitWithCode(m.compiler.VRegOf(execCtx), code) - case ssa.OpcodeExitIfTrueWithCode: - execCtx, c, code := instr.ExitIfTrueWithCodeData() - m.lowerExitIfTrueWithCode(m.compiler.VRegOf(execCtx), c, code) - case ssa.OpcodeStore, ssa.OpcodeIstore8, ssa.OpcodeIstore16, ssa.OpcodeIstore32: - m.lowerStore(instr) - case ssa.OpcodeLoad: - dst := instr.Return() - ptr, offset, typ := instr.LoadData() - m.lowerLoad(ptr, offset, typ, dst) - case ssa.OpcodeVZeroExtLoad: - dst := instr.Return() - ptr, offset, typ := instr.VZeroExtLoadData() - m.lowerLoad(ptr, offset, typ, dst) - case ssa.OpcodeUload8, ssa.OpcodeUload16, ssa.OpcodeUload32, ssa.OpcodeSload8, ssa.OpcodeSload16, ssa.OpcodeSload32: - ptr, offset, _ := instr.LoadData() - ret := m.compiler.VRegOf(instr.Return()) - m.lowerExtLoad(op, ptr, offset, ret) - case ssa.OpcodeCall, ssa.OpcodeCallIndirect: - m.lowerCall(instr) - case ssa.OpcodeIcmp: - m.lowerIcmp(instr) - case ssa.OpcodeVIcmp: - m.lowerVIcmp(instr) - case ssa.OpcodeVFcmp: - m.lowerVFcmp(instr) - case ssa.OpcodeVCeil: - m.lowerVecMisc(vecOpFrintp, instr) - case ssa.OpcodeVFloor: - m.lowerVecMisc(vecOpFrintm, instr) - case ssa.OpcodeVTrunc: - m.lowerVecMisc(vecOpFrintz, instr) - case ssa.OpcodeVNearest: - m.lowerVecMisc(vecOpFrintn, instr) - case ssa.OpcodeVMaxPseudo: - m.lowerVMinMaxPseudo(instr, true) - case ssa.OpcodeVMinPseudo: - m.lowerVMinMaxPseudo(instr, false) - case ssa.OpcodeBand: - m.lowerBitwiseAluOp(instr, aluOpAnd, false) - case ssa.OpcodeBor: - m.lowerBitwiseAluOp(instr, aluOpOrr, false) - case ssa.OpcodeBxor: - m.lowerBitwiseAluOp(instr, aluOpEor, false) - case ssa.OpcodeIshl: - m.lowerShifts(instr, extModeNone, aluOpLsl) - case ssa.OpcodeSshr: - if instr.Return().Type().Bits() == 64 { - m.lowerShifts(instr, extModeSignExtend64, aluOpAsr) - } else { - m.lowerShifts(instr, extModeSignExtend32, aluOpAsr) - } - case ssa.OpcodeUshr: - if instr.Return().Type().Bits() == 64 { - m.lowerShifts(instr, extModeZeroExtend64, aluOpLsr) - } else { - m.lowerShifts(instr, extModeZeroExtend32, aluOpLsr) - } - case ssa.OpcodeRotl: - m.lowerRotl(instr) - case ssa.OpcodeRotr: - m.lowerRotr(instr) - case ssa.OpcodeSExtend, ssa.OpcodeUExtend: - from, to, signed := instr.ExtendData() - m.lowerExtend(instr.Arg(), instr.Return(), from, to, signed) - case ssa.OpcodeFcmp: - x, y, c := instr.FcmpData() - m.lowerFcmp(x, y, instr.Return(), c) - case ssa.OpcodeImul: - x, y := instr.Arg2() - result := instr.Return() - m.lowerImul(x, y, result) - case ssa.OpcodeUndefined: - undef := m.allocateInstr() - undef.asUDF() - m.insert(undef) - case ssa.OpcodeSelect: - c, x, y := instr.SelectData() - if x.Type() == ssa.TypeV128 { - rc := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone) - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - m.lowerSelectVec(rc, rn, rm, rd) - } else { - m.lowerSelect(c, x, y, instr.Return()) - } - case ssa.OpcodeClz: - x := instr.Arg() - result := instr.Return() - m.lowerClz(x, result) - case ssa.OpcodeCtz: - x := instr.Arg() - result := instr.Return() - m.lowerCtz(x, result) - case ssa.OpcodePopcnt: - x := instr.Arg() - result := instr.Return() - m.lowerPopcnt(x, result) - case ssa.OpcodeFcvtToSint, ssa.OpcodeFcvtToSintSat: - x, ctx := instr.Arg2() - result := instr.Return() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(result) - ctxVReg := m.compiler.VRegOf(ctx) - m.lowerFpuToInt(rd, rn, ctxVReg, true, x.Type() == ssa.TypeF64, - result.Type().Bits() == 64, op == ssa.OpcodeFcvtToSintSat) - case ssa.OpcodeFcvtToUint, ssa.OpcodeFcvtToUintSat: - x, ctx := instr.Arg2() - result := instr.Return() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(result) - ctxVReg := m.compiler.VRegOf(ctx) - m.lowerFpuToInt(rd, rn, ctxVReg, false, x.Type() == ssa.TypeF64, - result.Type().Bits() == 64, op == ssa.OpcodeFcvtToUintSat) - case ssa.OpcodeFcvtFromSint: - x := instr.Arg() - result := instr.Return() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(result) - m.lowerIntToFpu(rd, rn, true, x.Type() == ssa.TypeI64, result.Type().Bits() == 64) - case ssa.OpcodeFcvtFromUint: - x := instr.Arg() - result := instr.Return() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(result) - m.lowerIntToFpu(rd, rn, false, x.Type() == ssa.TypeI64, result.Type().Bits() == 64) - case ssa.OpcodeFdemote: - v := instr.Arg() - rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - cnt := m.allocateInstr() - cnt.asFpuRR(fpuUniOpCvt64To32, rd, rn, false) - m.insert(cnt) - case ssa.OpcodeFpromote: - v := instr.Arg() - rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - cnt := m.allocateInstr() - cnt.asFpuRR(fpuUniOpCvt32To64, rd, rn, true) - m.insert(cnt) - case ssa.OpcodeIreduce: - rn := m.getOperand_NR(m.compiler.ValueDefinition(instr.Arg()), extModeNone) - retVal := instr.Return() - rd := m.compiler.VRegOf(retVal) - - if retVal.Type() != ssa.TypeI32 { - panic("TODO?: Ireduce to non-i32") - } - mov := m.allocateInstr() - mov.asMove32(rd, rn.reg()) - m.insert(mov) - case ssa.OpcodeFneg: - m.lowerFpuUniOp(fpuUniOpNeg, instr.Arg(), instr.Return()) - case ssa.OpcodeSqrt: - m.lowerFpuUniOp(fpuUniOpSqrt, instr.Arg(), instr.Return()) - case ssa.OpcodeCeil: - m.lowerFpuUniOp(fpuUniOpRoundPlus, instr.Arg(), instr.Return()) - case ssa.OpcodeFloor: - m.lowerFpuUniOp(fpuUniOpRoundMinus, instr.Arg(), instr.Return()) - case ssa.OpcodeTrunc: - m.lowerFpuUniOp(fpuUniOpRoundZero, instr.Arg(), instr.Return()) - case ssa.OpcodeNearest: - m.lowerFpuUniOp(fpuUniOpRoundNearest, instr.Arg(), instr.Return()) - case ssa.OpcodeFabs: - m.lowerFpuUniOp(fpuUniOpAbs, instr.Arg(), instr.Return()) - case ssa.OpcodeBitcast: - m.lowerBitcast(instr) - case ssa.OpcodeFcopysign: - x, y := instr.Arg2() - m.lowerFcopysign(x, y, instr.Return()) - case ssa.OpcodeSdiv, ssa.OpcodeUdiv: - x, y, ctx := instr.Arg3() - ctxVReg := m.compiler.VRegOf(ctx) - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - m.lowerIDiv(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSdiv) - case ssa.OpcodeSrem, ssa.OpcodeUrem: - x, y, ctx := instr.Arg3() - ctxVReg := m.compiler.VRegOf(ctx) - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - m.lowerIRem(ctxVReg, rd, rn.nr(), rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem) - case ssa.OpcodeVconst: - result := m.compiler.VRegOf(instr.Return()) - lo, hi := instr.VconstData() - v := m.allocateInstr() - v.asLoadFpuConst128(result, lo, hi) - m.insert(v) - case ssa.OpcodeVbnot: - x := instr.Arg() - ins := m.allocateInstr() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - ins.asVecMisc(vecOpNot, rd, rn, vecArrangement16B) - m.insert(ins) - case ssa.OpcodeVbxor: - x, y := instr.Arg2() - m.lowerVecRRR(vecOpEOR, x, y, instr.Return(), vecArrangement16B) - case ssa.OpcodeVbor: - x, y := instr.Arg2() - m.lowerVecRRR(vecOpOrr, x, y, instr.Return(), vecArrangement16B) - case ssa.OpcodeVband: - x, y := instr.Arg2() - m.lowerVecRRR(vecOpAnd, x, y, instr.Return(), vecArrangement16B) - case ssa.OpcodeVbandnot: - x, y := instr.Arg2() - m.lowerVecRRR(vecOpBic, x, y, instr.Return(), vecArrangement16B) - case ssa.OpcodeVbitselect: - c, x, y := instr.SelectData() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - creg := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone) - tmp := m.compiler.AllocateVReg(ssa.TypeV128) - - // creg is overwritten by BSL, so we need to move it to the result register before the instruction - // in case when it is used somewhere else. - mov := m.allocateInstr() - mov.asFpuMov128(tmp, creg.nr()) - m.insert(mov) - - ins := m.allocateInstr() - ins.asVecRRRRewrite(vecOpBsl, tmp, rn, rm, vecArrangement16B) - m.insert(ins) - - mov2 := m.allocateInstr() - rd := m.compiler.VRegOf(instr.Return()) - mov2.asFpuMov128(rd, tmp) - m.insert(mov2) - case ssa.OpcodeVanyTrue, ssa.OpcodeVallTrue: - x, lane := instr.ArgWithLane() - var arr vecArrangement - if op == ssa.OpcodeVallTrue { - arr = ssaLaneToArrangement(lane) - } - rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - m.lowerVcheckTrue(op, rm, rd, arr) - case ssa.OpcodeVhighBits: - x, lane := instr.ArgWithLane() - rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - arr := ssaLaneToArrangement(lane) - m.lowerVhighBits(rm, rd, arr) - case ssa.OpcodeVIadd: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpAdd, x, y, instr.Return(), arr) - case ssa.OpcodeExtIaddPairwise: - v, lane, signed := instr.ExtIaddPairwiseData() - vv := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - - tmpLo, tmpHi := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)), operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - var widen vecOp - if signed { - widen = vecOpSshll - } else { - widen = vecOpUshll - } - - var loArr, hiArr, dstArr vecArrangement - switch lane { - case ssa.VecLaneI8x16: - loArr, hiArr, dstArr = vecArrangement8B, vecArrangement16B, vecArrangement8H - case ssa.VecLaneI16x8: - loArr, hiArr, dstArr = vecArrangement4H, vecArrangement8H, vecArrangement4S - case ssa.VecLaneI32x4: - loArr, hiArr, dstArr = vecArrangement2S, vecArrangement4S, vecArrangement2D - default: - panic("unsupported lane " + lane.String()) - } - - widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo.nr(), vv, operandShiftImm(0), loArr) - widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi.nr(), vv, operandShiftImm(0), hiArr) - addp := m.allocateInstr().asVecRRR(vecOpAddp, m.compiler.VRegOf(instr.Return()), tmpLo, tmpHi, dstArr) - m.insert(widenLo) - m.insert(widenHi) - m.insert(addp) - - case ssa.OpcodeVSaddSat: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpSqadd, x, y, instr.Return(), arr) - case ssa.OpcodeVUaddSat: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpUqadd, x, y, instr.Return(), arr) - case ssa.OpcodeVIsub: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpSub, x, y, instr.Return(), arr) - case ssa.OpcodeVSsubSat: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpSqsub, x, y, instr.Return(), arr) - case ssa.OpcodeVUsubSat: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpUqsub, x, y, instr.Return(), arr) - case ssa.OpcodeVImin: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpSmin, x, y, instr.Return(), arr) - case ssa.OpcodeVUmin: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpUmin, x, y, instr.Return(), arr) - case ssa.OpcodeVImax: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpSmax, x, y, instr.Return(), arr) - case ssa.OpcodeVUmax: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpUmax, x, y, instr.Return(), arr) - case ssa.OpcodeVAvgRound: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpUrhadd, x, y, instr.Return(), arr) - case ssa.OpcodeVImul: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - m.lowerVIMul(rd, rn, rm, arr) - case ssa.OpcodeVIabs: - m.lowerVecMisc(vecOpAbs, instr) - case ssa.OpcodeVIneg: - m.lowerVecMisc(vecOpNeg, instr) - case ssa.OpcodeVIpopcnt: - m.lowerVecMisc(vecOpCnt, instr) - case ssa.OpcodeVIshl, - ssa.OpcodeVSshr, ssa.OpcodeVUshr: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - m.lowerVShift(op, rd, rn, rm, arr) - case ssa.OpcodeVSqrt: - m.lowerVecMisc(vecOpFsqrt, instr) - case ssa.OpcodeVFabs: - m.lowerVecMisc(vecOpFabs, instr) - case ssa.OpcodeVFneg: - m.lowerVecMisc(vecOpFneg, instr) - case ssa.OpcodeVFmin: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpFmin, x, y, instr.Return(), arr) - case ssa.OpcodeVFmax: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpFmax, x, y, instr.Return(), arr) - case ssa.OpcodeVFadd: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpFadd, x, y, instr.Return(), arr) - case ssa.OpcodeVFsub: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpFsub, x, y, instr.Return(), arr) - case ssa.OpcodeVFmul: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpFmul, x, y, instr.Return(), arr) - case ssa.OpcodeSqmulRoundSat: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpSqrdmulh, x, y, instr.Return(), arr) - case ssa.OpcodeVFdiv: - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - m.lowerVecRRR(vecOpFdiv, x, y, instr.Return(), arr) - case ssa.OpcodeVFcvtToSintSat, ssa.OpcodeVFcvtToUintSat: - x, lane := instr.ArgWithLane() - arr := ssaLaneToArrangement(lane) - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - m.lowerVfpuToInt(rd, rn, arr, op == ssa.OpcodeVFcvtToSintSat) - case ssa.OpcodeVFcvtFromSint, ssa.OpcodeVFcvtFromUint: - x, lane := instr.ArgWithLane() - arr := ssaLaneToArrangement(lane) - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - m.lowerVfpuFromInt(rd, rn, arr, op == ssa.OpcodeVFcvtFromSint) - case ssa.OpcodeSwidenLow, ssa.OpcodeUwidenLow: - x, lane := instr.ArgWithLane() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - - var arr vecArrangement - switch lane { - case ssa.VecLaneI8x16: - arr = vecArrangement8B - case ssa.VecLaneI16x8: - arr = vecArrangement4H - case ssa.VecLaneI32x4: - arr = vecArrangement2S - } - - shll := m.allocateInstr() - if signed := op == ssa.OpcodeSwidenLow; signed { - shll.asVecShiftImm(vecOpSshll, rd, rn, operandShiftImm(0), arr) - } else { - shll.asVecShiftImm(vecOpUshll, rd, rn, operandShiftImm(0), arr) - } - m.insert(shll) - case ssa.OpcodeSwidenHigh, ssa.OpcodeUwidenHigh: - x, lane := instr.ArgWithLane() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - - arr := ssaLaneToArrangement(lane) - - shll := m.allocateInstr() - if signed := op == ssa.OpcodeSwidenHigh; signed { - shll.asVecShiftImm(vecOpSshll, rd, rn, operandShiftImm(0), arr) - } else { - shll.asVecShiftImm(vecOpUshll, rd, rn, operandShiftImm(0), arr) - } - m.insert(shll) - - case ssa.OpcodeSnarrow, ssa.OpcodeUnarrow: - x, y, lane := instr.Arg2WithLane() - var arr, arr2 vecArrangement - switch lane { - case ssa.VecLaneI16x8: // I16x8 - arr = vecArrangement8B - arr2 = vecArrangement16B // Implies sqxtn2. - case ssa.VecLaneI32x4: - arr = vecArrangement4H - arr2 = vecArrangement8H // Implies sqxtn2. - default: - panic("unsupported lane " + lane.String()) - } - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - - tmp := m.compiler.AllocateVReg(ssa.TypeV128) - - loQxtn := m.allocateInstr() - hiQxtn := m.allocateInstr() - if signed := op == ssa.OpcodeSnarrow; signed { - // Narrow lanes on rn and write them into lower-half of rd. - loQxtn.asVecMisc(vecOpSqxtn, tmp, rn, arr) // low - // Narrow lanes on rm and write them into higher-half of rd. - hiQxtn.asVecMisc(vecOpSqxtn, tmp, rm, arr2) // high (sqxtn2) - } else { - // Narrow lanes on rn and write them into lower-half of rd. - loQxtn.asVecMisc(vecOpSqxtun, tmp, rn, arr) // low - // Narrow lanes on rm and write them into higher-half of rd. - hiQxtn.asVecMisc(vecOpSqxtun, tmp, rm, arr2) // high (sqxtn2) - } - m.insert(loQxtn) - m.insert(hiQxtn) - - mov := m.allocateInstr() - mov.asFpuMov128(rd, tmp) - m.insert(mov) - case ssa.OpcodeFvpromoteLow: - x, lane := instr.ArgWithLane() - if lane != ssa.VecLaneF32x4 { - panic("unsupported lane type " + lane.String()) - } - ins := m.allocateInstr() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - ins.asVecMisc(vecOpFcvtl, rd, rn, vecArrangement2S) - m.insert(ins) - case ssa.OpcodeFvdemote: - x, lane := instr.ArgWithLane() - if lane != ssa.VecLaneF64x2 { - panic("unsupported lane type " + lane.String()) - } - ins := m.allocateInstr() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - ins.asVecMisc(vecOpFcvtn, rd, rn, vecArrangement2S) - m.insert(ins) - case ssa.OpcodeExtractlane: - x, index, signed, lane := instr.ExtractlaneData() - - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - - mov := m.allocateInstr() - switch lane { - case ssa.VecLaneI8x16: - mov.asMovFromVec(rd, rn, vecArrangementB, vecIndex(index), signed) - case ssa.VecLaneI16x8: - mov.asMovFromVec(rd, rn, vecArrangementH, vecIndex(index), signed) - case ssa.VecLaneI32x4: - mov.asMovFromVec(rd, rn, vecArrangementS, vecIndex(index), signed) - case ssa.VecLaneI64x2: - mov.asMovFromVec(rd, rn, vecArrangementD, vecIndex(index), signed) - case ssa.VecLaneF32x4: - mov.asVecMovElement(rd, rn, vecArrangementS, vecIndex(0), vecIndex(index)) - case ssa.VecLaneF64x2: - mov.asVecMovElement(rd, rn, vecArrangementD, vecIndex(0), vecIndex(index)) - default: - panic("unsupported lane: " + lane.String()) - } - - m.insert(mov) - - case ssa.OpcodeInsertlane: - x, y, index, lane := instr.InsertlaneData() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - tmpReg := m.compiler.AllocateVReg(ssa.TypeV128) - - // Initially mov rn to tmp. - mov1 := m.allocateInstr() - mov1.asFpuMov128(tmpReg, rn.nr()) - m.insert(mov1) - - // movToVec and vecMovElement do not clear the remaining bits to zero, - // thus, we can mov rm in-place to tmp. - mov2 := m.allocateInstr() - switch lane { - case ssa.VecLaneI8x16: - mov2.asMovToVec(tmpReg, rm, vecArrangementB, vecIndex(index)) - case ssa.VecLaneI16x8: - mov2.asMovToVec(tmpReg, rm, vecArrangementH, vecIndex(index)) - case ssa.VecLaneI32x4: - mov2.asMovToVec(tmpReg, rm, vecArrangementS, vecIndex(index)) - case ssa.VecLaneI64x2: - mov2.asMovToVec(tmpReg, rm, vecArrangementD, vecIndex(index)) - case ssa.VecLaneF32x4: - mov2.asVecMovElement(tmpReg, rm, vecArrangementS, vecIndex(index), vecIndex(0)) - case ssa.VecLaneF64x2: - mov2.asVecMovElement(tmpReg, rm, vecArrangementD, vecIndex(index), vecIndex(0)) - } - m.insert(mov2) - - // Finally mov tmp to rd. - mov3 := m.allocateInstr() - mov3.asFpuMov128(rd, tmpReg) - m.insert(mov3) - - case ssa.OpcodeSwizzle: - x, y, lane := instr.Arg2WithLane() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - - arr := ssaLaneToArrangement(lane) - - // tbl <rd>.<arr>, { <rn>.<arr> }, <rm>.<arr> - tbl1 := m.allocateInstr() - tbl1.asVecTbl(1, rd, rn, rm, arr) - m.insert(tbl1) - - case ssa.OpcodeShuffle: - x, y, lane1, lane2 := instr.ShuffleData() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - - m.lowerShuffle(rd, rn, rm, lane1, lane2) - - case ssa.OpcodeSplat: - x, lane := instr.ArgWithLane() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - - dup := m.allocateInstr() - switch lane { - case ssa.VecLaneI8x16: - dup.asVecDup(rd, rn, vecArrangement16B) - case ssa.VecLaneI16x8: - dup.asVecDup(rd, rn, vecArrangement8H) - case ssa.VecLaneI32x4: - dup.asVecDup(rd, rn, vecArrangement4S) - case ssa.VecLaneI64x2: - dup.asVecDup(rd, rn, vecArrangement2D) - case ssa.VecLaneF32x4: - dup.asVecDupElement(rd, rn, vecArrangementS, vecIndex(0)) - case ssa.VecLaneF64x2: - dup.asVecDupElement(rd, rn, vecArrangementD, vecIndex(0)) - } - m.insert(dup) - - case ssa.OpcodeWideningPairwiseDotProductS: - x, y := instr.Arg2() - xx, yy := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone), - m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - tmp, tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)), operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp.nr(), xx, yy, vecArrangement8H)) - m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2.nr(), xx, yy, vecArrangement8H)) - m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp2, vecArrangement4S)) - - rd := m.compiler.VRegOf(instr.Return()) - m.insert(m.allocateInstr().asFpuMov128(rd, tmp.nr())) - - case ssa.OpcodeLoadSplat: - ptr, offset, lane := instr.LoadSplatData() - m.lowerLoadSplat(ptr, offset, lane, instr.Return()) - - case ssa.OpcodeAtomicRmw: - m.lowerAtomicRmw(instr) - - case ssa.OpcodeAtomicCas: - m.lowerAtomicCas(instr) - - case ssa.OpcodeAtomicLoad: - m.lowerAtomicLoad(instr) - - case ssa.OpcodeAtomicStore: - m.lowerAtomicStore(instr) - - case ssa.OpcodeFence: - instr := m.allocateInstr() - instr.asDMB() - m.insert(instr) - - default: - panic("TODO: lowering " + op.String()) - } - m.FlushPendingInstructions() -} - -func (m *machine) lowerShuffle(rd regalloc.VReg, rn, rm operand, lane1, lane2 uint64) { - // `tbl2` requires 2 consecutive registers, so we arbitrarily pick v29, v30. - vReg, wReg := v29VReg, v30VReg - - // Initialize v29, v30 to rn, rm. - movv := m.allocateInstr() - movv.asFpuMov128(vReg, rn.nr()) - m.insert(movv) - - movw := m.allocateInstr() - movw.asFpuMov128(wReg, rm.nr()) - m.insert(movw) - - // `lane1`, `lane2` are already encoded as two u64s with the right layout: - // lane1 := lane[7]<<56 | ... | lane[1]<<8 | lane[0] - // lane2 := lane[15]<<56 | ... | lane[9]<<8 | lane[8] - // Thus, we can use loadFpuConst128. - tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - lfc := m.allocateInstr() - lfc.asLoadFpuConst128(tmp.nr(), lane1, lane2) - m.insert(lfc) - - // tbl <rd>.16b, { <vReg>.16B, <wReg>.16b }, <tmp>.16b - tbl2 := m.allocateInstr() - tbl2.asVecTbl(2, rd, operandNR(vReg), tmp, vecArrangement16B) - m.insert(tbl2) -} - -func (m *machine) lowerVShift(op ssa.Opcode, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { - var modulo byte - switch arr { - case vecArrangement16B: - modulo = 0x7 // Modulo 8. - case vecArrangement8H: - modulo = 0xf // Modulo 16. - case vecArrangement4S: - modulo = 0x1f // Modulo 32. - case vecArrangement2D: - modulo = 0x3f // Modulo 64. - default: - panic("unsupported arrangment " + arr.String()) - } - - rtmp := operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) - vtmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - - and := m.allocateInstr() - and.asALUBitmaskImm(aluOpAnd, rtmp.nr(), rm.nr(), uint64(modulo), true) - m.insert(and) - - if op != ssa.OpcodeVIshl { - // Negate the amount to make this as right shift. - neg := m.allocateInstr() - neg.asALU(aluOpSub, rtmp.nr(), operandNR(xzrVReg), rtmp, true) - m.insert(neg) - } - - // Copy the shift amount into a vector register as sshl/ushl requires it to be there. - dup := m.allocateInstr() - dup.asVecDup(vtmp.nr(), rtmp, arr) - m.insert(dup) - - if op == ssa.OpcodeVIshl || op == ssa.OpcodeVSshr { - sshl := m.allocateInstr() - sshl.asVecRRR(vecOpSshl, rd, rn, vtmp, arr) - m.insert(sshl) - } else { - ushl := m.allocateInstr() - ushl.asVecRRR(vecOpUshl, rd, rn, vtmp, arr) - m.insert(ushl) - } -} - -func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm operand, rd regalloc.VReg, arr vecArrangement) { - tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - - // Special case VallTrue for i64x2. - if op == ssa.OpcodeVallTrue && arr == vecArrangement2D { - // cmeq v3?.2d, v2?.2d, #0 - // addp v3?.2d, v3?.2d, v3?.2d - // fcmp v3?, v3? - // cset dst, eq - - ins := m.allocateInstr() - ins.asVecMisc(vecOpCmeq0, tmp.nr(), rm, vecArrangement2D) - m.insert(ins) - - addp := m.allocateInstr() - addp.asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp, vecArrangement2D) - m.insert(addp) - - fcmp := m.allocateInstr() - fcmp.asFpuCmp(tmp, tmp, true) - m.insert(fcmp) - - cset := m.allocateInstr() - cset.asCSet(rd, false, eq) - m.insert(cset) - - return - } - - // Create a scalar value with umaxp or uminv, then compare it against zero. - ins := m.allocateInstr() - if op == ssa.OpcodeVanyTrue { - // umaxp v4?.16b, v2?.16b, v2?.16b - ins.asVecRRR(vecOpUmaxp, tmp.nr(), rm, rm, vecArrangement16B) - } else { - // uminv d4?, v2?.4s - ins.asVecLanes(vecOpUminv, tmp.nr(), rm, arr) - } - m.insert(ins) - - // mov x3?, v4?.d[0] - // ccmp x3?, #0x0, #0x0, al - // cset x3?, ne - // mov x0, x3? - - movv := m.allocateInstr() - movv.asMovFromVec(rd, tmp, vecArrangementD, vecIndex(0), false) - m.insert(movv) - - fc := m.allocateInstr() - fc.asCCmpImm(operandNR(rd), uint64(0), al, 0, true) - m.insert(fc) - - cset := m.allocateInstr() - cset.asCSet(rd, false, ne) - m.insert(cset) -} - -func (m *machine) lowerVhighBits(rm operand, rd regalloc.VReg, arr vecArrangement) { - r0 := operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) - v0 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - v1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - - switch arr { - case vecArrangement16B: - // sshr v6?.16b, v2?.16b, #7 - // movz x4?, #0x201, lsl 0 - // movk x4?, #0x804, lsl 16 - // movk x4?, #0x2010, lsl 32 - // movk x4?, #0x8040, lsl 48 - // dup v5?.2d, x4? - // and v6?.16b, v6?.16b, v5?.16b - // ext v5?.16b, v6?.16b, v6?.16b, #8 - // zip1 v5?.16b, v6?.16b, v5?.16b - // addv s5?, v5?.8h - // umov s3?, v5?.h[0] - - // Right arithmetic shift on the original vector and store the result into v1. So we have: - // v1[i] = 0xff if vi<0, 0 otherwise. - sshr := m.allocateInstr() - sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(7), vecArrangement16B) - m.insert(sshr) - - // Load the bit mask into r0. - m.insertMOVZ(r0.nr(), 0x0201, 0, true) - m.insertMOVK(r0.nr(), 0x0804, 1, true) - m.insertMOVK(r0.nr(), 0x2010, 2, true) - m.insertMOVK(r0.nr(), 0x8040, 3, true) - - // dup r0 to v0. - dup := m.allocateInstr() - dup.asVecDup(v0.nr(), r0, vecArrangement2D) - m.insert(dup) - - // Lane-wise logical AND with the bit mask, meaning that we have - // v[i] = (1 << i) if vi<0, 0 otherwise. - // - // Below, we use the following notation: - // wi := (1 << i) if vi<0, 0 otherwise. - and := m.allocateInstr() - and.asVecRRR(vecOpAnd, v1.nr(), v1, v0, vecArrangement16B) - m.insert(and) - - // Swap the lower and higher 8 byte elements, and write it into v0, meaning that we have - // v0[i] = w(i+8) if i < 8, w(i-8) otherwise. - ext := m.allocateInstr() - ext.asVecExtract(v0.nr(), v1, v1, vecArrangement16B, uint32(8)) - m.insert(ext) - - // v = [w0, w8, ..., w7, w15] - zip1 := m.allocateInstr() - zip1.asVecPermute(vecOpZip1, v0.nr(), v1, v0, vecArrangement16B) - m.insert(zip1) - - // v.h[0] = w0 + ... + w15 - addv := m.allocateInstr() - addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H) - m.insert(addv) - - // Extract the v.h[0] as the result. - movfv := m.allocateInstr() - movfv.asMovFromVec(rd, v0, vecArrangementH, vecIndex(0), false) - m.insert(movfv) - case vecArrangement8H: - // sshr v6?.8h, v2?.8h, #15 - // movz x4?, #0x1, lsl 0 - // movk x4?, #0x2, lsl 16 - // movk x4?, #0x4, lsl 32 - // movk x4?, #0x8, lsl 48 - // dup v5?.2d, x4? - // lsl x4?, x4?, 0x4 - // ins v5?.d[1], x4? - // and v5?.16b, v6?.16b, v5?.16b - // addv s5?, v5?.8h - // umov s3?, v5?.h[0] - - // Right arithmetic shift on the original vector and store the result into v1. So we have: - // v[i] = 0xffff if vi<0, 0 otherwise. - sshr := m.allocateInstr() - sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(15), vecArrangement8H) - m.insert(sshr) - - // Load the bit mask into r0. - m.lowerConstantI64(r0.nr(), 0x0008000400020001) - - // dup r0 to vector v0. - dup := m.allocateInstr() - dup.asVecDup(v0.nr(), r0, vecArrangement2D) - m.insert(dup) - - lsl := m.allocateInstr() - lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(4), true) - m.insert(lsl) - - movv := m.allocateInstr() - movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1)) - m.insert(movv) - - // Lane-wise logical AND with the bitmask, meaning that we have - // v[i] = (1 << i) if vi<0, 0 otherwise for i=0..3 - // = (1 << (i+4)) if vi<0, 0 otherwise for i=3..7 - and := m.allocateInstr() - and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B) - m.insert(and) - - addv := m.allocateInstr() - addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H) - m.insert(addv) - - movfv := m.allocateInstr() - movfv.asMovFromVec(rd, v0, vecArrangementH, vecIndex(0), false) - m.insert(movfv) - case vecArrangement4S: - // sshr v6?.8h, v2?.8h, #15 - // movz x4?, #0x1, lsl 0 - // movk x4?, #0x2, lsl 16 - // movk x4?, #0x4, lsl 32 - // movk x4?, #0x8, lsl 48 - // dup v5?.2d, x4? - // lsl x4?, x4?, 0x4 - // ins v5?.d[1], x4? - // and v5?.16b, v6?.16b, v5?.16b - // addv s5?, v5?.8h - // umov s3?, v5?.h[0] - - // Right arithmetic shift on the original vector and store the result into v1. So we have: - // v[i] = 0xffffffff if vi<0, 0 otherwise. - sshr := m.allocateInstr() - sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(31), vecArrangement4S) - m.insert(sshr) - - // Load the bit mask into r0. - m.lowerConstantI64(r0.nr(), 0x0000000200000001) - - // dup r0 to vector v0. - dup := m.allocateInstr() - dup.asVecDup(v0.nr(), r0, vecArrangement2D) - m.insert(dup) - - lsl := m.allocateInstr() - lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(2), true) - m.insert(lsl) - - movv := m.allocateInstr() - movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1)) - m.insert(movv) - - // Lane-wise logical AND with the bitmask, meaning that we have - // v[i] = (1 << i) if vi<0, 0 otherwise for i in [0, 1] - // = (1 << (i+4)) if vi<0, 0 otherwise for i in [2, 3] - and := m.allocateInstr() - and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B) - m.insert(and) - - addv := m.allocateInstr() - addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement4S) - m.insert(addv) - - movfv := m.allocateInstr() - movfv.asMovFromVec(rd, v0, vecArrangementS, vecIndex(0), false) - m.insert(movfv) - case vecArrangement2D: - // mov d3?, v2?.d[0] - // mov x4?, v2?.d[1] - // lsr x4?, x4?, 0x3f - // lsr d3?, d3?, 0x3f - // add s3?, s3?, w4?, lsl #1 - - // Move the lower 64-bit int into result. - movv0 := m.allocateInstr() - movv0.asMovFromVec(rd, rm, vecArrangementD, vecIndex(0), false) - m.insert(movv0) - - // Move the higher 64-bit int into r0. - movv1 := m.allocateInstr() - movv1.asMovFromVec(r0.nr(), rm, vecArrangementD, vecIndex(1), false) - m.insert(movv1) - - // Move the sign bit into the least significant bit. - lsr1 := m.allocateInstr() - lsr1.asALUShift(aluOpLsr, r0.nr(), r0, operandShiftImm(63), true) - m.insert(lsr1) - - lsr2 := m.allocateInstr() - lsr2.asALUShift(aluOpLsr, rd, operandNR(rd), operandShiftImm(63), true) - m.insert(lsr2) - - // rd = (r0<<1) | rd - lsl := m.allocateInstr() - lsl.asALU(aluOpAdd, rd, operandNR(rd), operandSR(r0.nr(), 1, shiftOpLSL), false) - m.insert(lsl) - default: - panic("Unsupported " + arr.String()) - } -} - -func (m *machine) lowerVecMisc(op vecOp, instr *ssa.Instruction) { - x, lane := instr.ArgWithLane() - arr := ssaLaneToArrangement(lane) - ins := m.allocateInstr() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - ins.asVecMisc(op, rd, rn, arr) - m.insert(ins) -} - -func (m *machine) lowerVecRRR(op vecOp, x, y, ret ssa.Value, arr vecArrangement) { - ins := m.allocateInstr() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := m.compiler.VRegOf(ret) - ins.asVecRRR(op, rd, rn, rm, arr) - m.insert(ins) -} - -func (m *machine) lowerVIMul(rd regalloc.VReg, rn, rm operand, arr vecArrangement) { - if arr != vecArrangement2D { - mul := m.allocateInstr() - mul.asVecRRR(vecOpMul, rd, rn, rm, arr) - m.insert(mul) - } else { - tmp1 := m.compiler.AllocateVReg(ssa.TypeV128) - tmp2 := m.compiler.AllocateVReg(ssa.TypeV128) - tmp3 := m.compiler.AllocateVReg(ssa.TypeV128) - - tmpRes := m.compiler.AllocateVReg(ssa.TypeV128) - - // Following the algorithm in https://chromium-review.googlesource.com/c/v8/v8/+/1781696 - rev64 := m.allocateInstr() - rev64.asVecMisc(vecOpRev64, tmp2, rm, vecArrangement4S) - m.insert(rev64) - - mul := m.allocateInstr() - mul.asVecRRR(vecOpMul, tmp2, operandNR(tmp2), rn, vecArrangement4S) - m.insert(mul) - - xtn1 := m.allocateInstr() - xtn1.asVecMisc(vecOpXtn, tmp1, rn, vecArrangement2S) - m.insert(xtn1) - - addp := m.allocateInstr() - addp.asVecRRR(vecOpAddp, tmp2, operandNR(tmp2), operandNR(tmp2), vecArrangement4S) - m.insert(addp) - - xtn2 := m.allocateInstr() - xtn2.asVecMisc(vecOpXtn, tmp3, rm, vecArrangement2S) - m.insert(xtn2) - - // Note: do not write the result directly into result yet. This is the same reason as in bsl. - // In short, in UMLAL instruction, the result register is also one of the source register, and - // the value on the result register is significant. - shll := m.allocateInstr() - shll.asVecMisc(vecOpShll, tmpRes, operandNR(tmp2), vecArrangement2S) - m.insert(shll) - - umlal := m.allocateInstr() - umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, operandNR(tmp3), operandNR(tmp1), vecArrangement2S) - m.insert(umlal) - - mov := m.allocateInstr() - mov.asFpuMov128(rd, tmpRes) - m.insert(mov) - } -} - -func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) { - x, y, lane := instr.Arg2WithLane() - arr := ssaLaneToArrangement(lane) - - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - - // Note: this usage of tmp is important. - // BSL modifies the destination register, so we need to use a temporary register so that - // the actual definition of the destination register happens *after* the BSL instruction. - // That way, we can force the spill instruction to be inserted after the BSL instruction. - tmp := m.compiler.AllocateVReg(ssa.TypeV128) - - fcmgt := m.allocateInstr() - if max { - fcmgt.asVecRRR(vecOpFcmgt, tmp, rm, rn, arr) - } else { - // If min, swap the args. - fcmgt.asVecRRR(vecOpFcmgt, tmp, rn, rm, arr) - } - m.insert(fcmgt) - - bsl := m.allocateInstr() - bsl.asVecRRRRewrite(vecOpBsl, tmp, rm, rn, vecArrangement16B) - m.insert(bsl) - - res := operandNR(m.compiler.VRegOf(instr.Return())) - mov2 := m.allocateInstr() - mov2.asFpuMov128(res.nr(), tmp) - m.insert(mov2) -} - -func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn regalloc.VReg, rm operand, _64bit, signed bool) { - div := m.allocateInstr() - - if signed { - div.asALU(aluOpSDiv, rd, operandNR(rn), rm, _64bit) - } else { - div.asALU(aluOpUDiv, rd, operandNR(rn), rm, _64bit) - } - m.insert(div) - - // Check if rm is zero: - m.exitIfNot(execCtxVReg, registerAsRegNotZeroCond(rm.nr()), _64bit, wazevoapi.ExitCodeIntegerDivisionByZero) - - // rd = rn-rd*rm by MSUB instruction. - msub := m.allocateInstr() - msub.asALURRRR(aluOpMSub, rd, operandNR(rd), rm, rn, _64bit) - m.insert(msub) -} - -func (m *machine) lowerIDiv(execCtxVReg, rd regalloc.VReg, rn, rm operand, _64bit, signed bool) { - div := m.allocateInstr() - - if signed { - div.asALU(aluOpSDiv, rd, rn, rm, _64bit) - } else { - div.asALU(aluOpUDiv, rd, rn, rm, _64bit) - } - m.insert(div) - - // Check if rm is zero: - m.exitIfNot(execCtxVReg, registerAsRegNotZeroCond(rm.nr()), _64bit, wazevoapi.ExitCodeIntegerDivisionByZero) - - if signed { - // We need to check the signed overflow which happens iff "math.MinInt{32,64} / -1" - minusOneCheck := m.allocateInstr() - // Sets eq condition if rm == -1. - minusOneCheck.asALU(aluOpAddS, xzrVReg, rm, operandImm12(1, 0), _64bit) - m.insert(minusOneCheck) - - ccmp := m.allocateInstr() - // If eq condition is set, sets the flag by the result based on "rn - 1", otherwise clears the flag. - ccmp.asCCmpImm(rn, 1, eq, 0, _64bit) - m.insert(ccmp) - - // Check the overflow flag. - m.exitIfNot(execCtxVReg, vs.invert().asCond(), false, wazevoapi.ExitCodeIntegerOverflow) - } -} - -// exitIfNot emits a conditional branch to exit if the condition is not met. -// If `c` (cond type) is a register, `cond64bit` must be chosen to indicate whether the register is 32-bit or 64-bit. -// Otherwise, `cond64bit` is ignored. -func (m *machine) exitIfNot(execCtxVReg regalloc.VReg, c cond, cond64bit bool, code wazevoapi.ExitCode) { - execCtxTmp := m.copyToTmp(execCtxVReg) - - cbr := m.allocateInstr() - m.insert(cbr) - m.lowerExitWithCode(execCtxTmp, code) - // Conditional branch target is after exit. - l := m.insertBrTargetLabel() - cbr.asCondBr(c, l, cond64bit) -} - -func (m *machine) lowerFcopysign(x, y, ret ssa.Value) { - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - var tmpI, tmpF regalloc.VReg - _64 := x.Type() == ssa.TypeF64 - if _64 { - tmpF = m.compiler.AllocateVReg(ssa.TypeF64) - tmpI = m.compiler.AllocateVReg(ssa.TypeI64) - } else { - tmpF = m.compiler.AllocateVReg(ssa.TypeF32) - tmpI = m.compiler.AllocateVReg(ssa.TypeI32) - } - rd := m.compiler.VRegOf(ret) - m.lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF, _64) -} - -func (m *machine) lowerFcopysignImpl(rd regalloc.VReg, rn, rm operand, tmpI, tmpF regalloc.VReg, _64bit bool) { - // This is exactly the same code emitted by GCC for "__builtin_copysign": - // - // mov x0, -9223372036854775808 - // fmov d2, x0 - // vbit v0.8b, v1.8b, v2.8b - // - - setMSB := m.allocateInstr() - if _64bit { - m.lowerConstantI64(tmpI, math.MinInt64) - setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementD, vecIndex(0)) - } else { - m.lowerConstantI32(tmpI, math.MinInt32) - setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementS, vecIndex(0)) - } - m.insert(setMSB) - - tmpReg := m.compiler.AllocateVReg(ssa.TypeF64) - - mov := m.allocateInstr() - mov.asFpuMov64(tmpReg, rn.nr()) - m.insert(mov) - - vbit := m.allocateInstr() - vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, operandNR(tmpF), vecArrangement8B) - m.insert(vbit) - - movDst := m.allocateInstr() - movDst.asFpuMov64(rd, tmpReg) - m.insert(movDst) -} - -func (m *machine) lowerBitcast(instr *ssa.Instruction) { - v, dstType := instr.BitcastData() - srcType := v.Type() - rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - rd := m.compiler.VRegOf(instr.Return()) - srcInt := srcType.IsInt() - dstInt := dstType.IsInt() - switch { - case srcInt && !dstInt: // Int to Float: - mov := m.allocateInstr() - var arr vecArrangement - if srcType.Bits() == 64 { - arr = vecArrangementD - } else { - arr = vecArrangementS - } - mov.asMovToVec(rd, rn, arr, vecIndex(0)) - m.insert(mov) - case !srcInt && dstInt: // Float to Int: - mov := m.allocateInstr() - var arr vecArrangement - if dstType.Bits() == 64 { - arr = vecArrangementD - } else { - arr = vecArrangementS - } - mov.asMovFromVec(rd, rn, arr, vecIndex(0), false) - m.insert(mov) - default: - panic("TODO?BUG?") - } -} - -func (m *machine) lowerFpuUniOp(op fpuUniOp, in, out ssa.Value) { - rn := m.getOperand_NR(m.compiler.ValueDefinition(in), extModeNone) - rd := m.compiler.VRegOf(out) - - neg := m.allocateInstr() - neg.asFpuRR(op, rd, rn, in.Type().Bits() == 64) - m.insert(neg) -} - -func (m *machine) lowerFpuToInt(rd regalloc.VReg, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) { - if !nonTrapping { - // First of all, we have to clear the FPU flags. - flagClear := m.allocateInstr() - flagClear.asMovToFPSR(xzrVReg) - m.insert(flagClear) - } - - // Then, do the conversion which doesn't trap inherently. - cvt := m.allocateInstr() - cvt.asFpuToInt(rd, rn, signed, src64bit, dst64bit) - m.insert(cvt) - - if !nonTrapping { - tmpReg := m.compiler.AllocateVReg(ssa.TypeI64) - - // After the conversion, check the FPU flags. - getFlag := m.allocateInstr() - getFlag.asMovFromFPSR(tmpReg) - m.insert(getFlag) - - execCtx := m.copyToTmp(ctx) - _rn := operandNR(m.copyToTmp(rn.nr())) - - // Check if the conversion was undefined by comparing the status with 1. - // See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register - alu := m.allocateInstr() - alu.asALU(aluOpSubS, xzrVReg, operandNR(tmpReg), operandImm12(1, 0), true) - m.insert(alu) - - // If it is not undefined, we can return the result. - ok := m.allocateInstr() - m.insert(ok) - - // Otherwise, we have to choose the status depending on it is overflow or NaN conversion. - - // Comparing itself to check if it is a NaN. - fpuCmp := m.allocateInstr() - fpuCmp.asFpuCmp(_rn, _rn, src64bit) - m.insert(fpuCmp) - // If the VC flag is not set (== VS flag is set), it is a NaN. - m.exitIfNot(execCtx, vc.asCond(), false, wazevoapi.ExitCodeInvalidConversionToInteger) - // Otherwise, it is an overflow. - m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow) - - // Conditional branch target is after exit. - l := m.insertBrTargetLabel() - ok.asCondBr(ne.asCond(), l, false /* ignored */) - } -} - -func (m *machine) lowerIntToFpu(rd regalloc.VReg, rn operand, signed, src64bit, dst64bit bool) { - cvt := m.allocateInstr() - cvt.asIntToFpu(rd, rn, signed, src64bit, dst64bit) - m.insert(cvt) -} - -func (m *machine) lowerFpuBinOp(si *ssa.Instruction) { - instr := m.allocateInstr() - var op fpuBinOp - switch si.Opcode() { - case ssa.OpcodeFadd: - op = fpuBinOpAdd - case ssa.OpcodeFsub: - op = fpuBinOpSub - case ssa.OpcodeFmul: - op = fpuBinOpMul - case ssa.OpcodeFdiv: - op = fpuBinOpDiv - case ssa.OpcodeFmax: - op = fpuBinOpMax - case ssa.OpcodeFmin: - op = fpuBinOpMin - } - x, y := si.Arg2() - xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) - rn := m.getOperand_NR(xDef, extModeNone) - rm := m.getOperand_NR(yDef, extModeNone) - rd := m.compiler.VRegOf(si.Return()) - instr.asFpuRRR(op, rd, rn, rm, x.Type().Bits() == 64) - m.insert(instr) -} - -func (m *machine) lowerSubOrAdd(si *ssa.Instruction, add bool) { - x, y := si.Arg2() - if !x.Type().IsInt() { - panic("BUG?") - } - - xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) - rn := m.getOperand_NR(xDef, extModeNone) - rm, yNegated := m.getOperand_MaybeNegatedImm12_ER_SR_NR(yDef, extModeNone) - - var aop aluOp - switch { - case add && !yNegated: // rn+rm = x+y - aop = aluOpAdd - case add && yNegated: // rn-rm = x-(-y) = x+y - aop = aluOpSub - case !add && !yNegated: // rn-rm = x-y - aop = aluOpSub - case !add && yNegated: // rn+rm = x-(-y) = x-y - aop = aluOpAdd - } - rd := m.compiler.VRegOf(si.Return()) - alu := m.allocateInstr() - alu.asALU(aop, rd, rn, rm, x.Type().Bits() == 64) - m.insert(alu) -} - -// InsertMove implements backend.Machine. -func (m *machine) InsertMove(dst, src regalloc.VReg, typ ssa.Type) { - instr := m.allocateInstr() - switch typ { - case ssa.TypeI32, ssa.TypeI64: - instr.asMove64(dst, src) - case ssa.TypeF32, ssa.TypeF64: - instr.asFpuMov64(dst, src) - case ssa.TypeV128: - instr.asFpuMov128(dst, src) - default: - panic("TODO") - } - m.insert(instr) -} - -func (m *machine) lowerIcmp(si *ssa.Instruction) { - x, y, c := si.IcmpData() - flag := condFlagFromSSAIntegerCmpCond(c) - - in64bit := x.Type().Bits() == 64 - var ext extMode - if in64bit { - if c.Signed() { - ext = extModeSignExtend64 - } else { - ext = extModeZeroExtend64 - } - } else { - if c.Signed() { - ext = extModeSignExtend32 - } else { - ext = extModeZeroExtend32 - } - } - - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext) - rm := m.getOperand_Imm12_ER_SR_NR(m.compiler.ValueDefinition(y), ext) - alu := m.allocateInstr() - alu.asALU(aluOpSubS, xzrVReg, rn, rm, in64bit) - m.insert(alu) - - cset := m.allocateInstr() - cset.asCSet(m.compiler.VRegOf(si.Return()), false, flag) - m.insert(cset) -} - -func (m *machine) lowerVIcmp(si *ssa.Instruction) { - x, y, c, lane := si.VIcmpData() - flag := condFlagFromSSAIntegerCmpCond(c) - arr := ssaLaneToArrangement(lane) - - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := m.compiler.VRegOf(si.Return()) - - switch flag { - case eq: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpCmeq, rd, rn, rm, arr) - m.insert(cmp) - case ne: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpCmeq, rd, rn, rm, arr) - m.insert(cmp) - not := m.allocateInstr() - not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B) - m.insert(not) - case ge: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpCmge, rd, rn, rm, arr) - m.insert(cmp) - case gt: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpCmgt, rd, rn, rm, arr) - m.insert(cmp) - case le: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpCmge, rd, rm, rn, arr) // rm, rn are swapped - m.insert(cmp) - case lt: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpCmgt, rd, rm, rn, arr) // rm, rn are swapped - m.insert(cmp) - case hs: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpCmhs, rd, rn, rm, arr) - m.insert(cmp) - case hi: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpCmhi, rd, rn, rm, arr) - m.insert(cmp) - case ls: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpCmhs, rd, rm, rn, arr) // rm, rn are swapped - m.insert(cmp) - case lo: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpCmhi, rd, rm, rn, arr) // rm, rn are swapped - m.insert(cmp) - } -} - -func (m *machine) lowerVFcmp(si *ssa.Instruction) { - x, y, c, lane := si.VFcmpData() - flag := condFlagFromSSAFloatCmpCond(c) - arr := ssaLaneToArrangement(lane) - - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := m.compiler.VRegOf(si.Return()) - - switch flag { - case eq: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpFcmeq, rd, rn, rm, arr) - m.insert(cmp) - case ne: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpFcmeq, rd, rn, rm, arr) - m.insert(cmp) - not := m.allocateInstr() - not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B) - m.insert(not) - case ge: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpFcmge, rd, rn, rm, arr) - m.insert(cmp) - case gt: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpFcmgt, rd, rn, rm, arr) - m.insert(cmp) - case mi: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpFcmgt, rd, rm, rn, arr) // rm, rn are swapped - m.insert(cmp) - case ls: - cmp := m.allocateInstr() - cmp.asVecRRR(vecOpFcmge, rd, rm, rn, arr) // rm, rn are swapped - m.insert(cmp) - } -} - -func (m *machine) lowerVfpuToInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) { - cvt := m.allocateInstr() - if signed { - cvt.asVecMisc(vecOpFcvtzs, rd, rn, arr) - } else { - cvt.asVecMisc(vecOpFcvtzu, rd, rn, arr) - } - m.insert(cvt) - - if arr == vecArrangement2D { - narrow := m.allocateInstr() - if signed { - narrow.asVecMisc(vecOpSqxtn, rd, operandNR(rd), vecArrangement2S) - } else { - narrow.asVecMisc(vecOpUqxtn, rd, operandNR(rd), vecArrangement2S) - } - m.insert(narrow) - } -} - -func (m *machine) lowerVfpuFromInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) { - cvt := m.allocateInstr() - if signed { - cvt.asVecMisc(vecOpScvtf, rd, rn, arr) - } else { - cvt.asVecMisc(vecOpUcvtf, rd, rn, arr) - } - m.insert(cvt) -} - -func (m *machine) lowerShifts(si *ssa.Instruction, ext extMode, aluOp aluOp) { - x, amount := si.Arg2() - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext) - rm := m.getOperand_ShiftImm_NR(m.compiler.ValueDefinition(amount), ext, x.Type().Bits()) - rd := m.compiler.VRegOf(si.Return()) - - alu := m.allocateInstr() - alu.asALUShift(aluOp, rd, rn, rm, x.Type().Bits() == 64) - m.insert(alu) -} - -func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult bool) { - x, y := si.Arg2() - - xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) - rn := m.getOperand_NR(xDef, extModeNone) - - var rd regalloc.VReg - if ignoreResult { - rd = xzrVReg - } else { - rd = m.compiler.VRegOf(si.Return()) - } - - _64 := x.Type().Bits() == 64 - alu := m.allocateInstr() - if instr := yDef.Instr; instr != nil && instr.Constant() { - c := instr.ConstantVal() - if isBitMaskImmediate(c, _64) { - // Constant bit wise operations can be lowered to a single instruction. - alu.asALUBitmaskImm(op, rd, rn.nr(), c, _64) - m.insert(alu) - return - } - } - - rm := m.getOperand_SR_NR(yDef, extModeNone) - alu.asALU(op, rd, rn, rm, _64) - m.insert(alu) -} - -func (m *machine) lowerRotl(si *ssa.Instruction) { - x, y := si.Arg2() - r := si.Return() - _64 := r.Type().Bits() == 64 - - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - var tmp regalloc.VReg - if _64 { - tmp = m.compiler.AllocateVReg(ssa.TypeI64) - } else { - tmp = m.compiler.AllocateVReg(ssa.TypeI32) - } - rd := m.compiler.VRegOf(r) - - // Encode rotl as neg + rotr: neg is a sub against the zero-reg. - m.lowerRotlImpl(rd, rn, rm, tmp, _64) -} - -func (m *machine) lowerRotlImpl(rd regalloc.VReg, rn, rm operand, tmp regalloc.VReg, is64bit bool) { - // Encode rotl as neg + rotr: neg is a sub against the zero-reg. - neg := m.allocateInstr() - neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rm, is64bit) - m.insert(neg) - alu := m.allocateInstr() - alu.asALU(aluOpRotR, rd, rn, operandNR(tmp), is64bit) - m.insert(alu) -} - -func (m *machine) lowerRotr(si *ssa.Instruction) { - x, y := si.Arg2() - - xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) - rn := m.getOperand_NR(xDef, extModeNone) - rm := m.getOperand_NR(yDef, extModeNone) - rd := m.compiler.VRegOf(si.Return()) - - alu := m.allocateInstr() - alu.asALU(aluOpRotR, rd, rn, rm, si.Return().Type().Bits() == 64) - m.insert(alu) -} - -func (m *machine) lowerExtend(arg, ret ssa.Value, from, to byte, signed bool) { - rd := m.compiler.VRegOf(ret) - def := m.compiler.ValueDefinition(arg) - - if instr := def.Instr; !signed && from == 32 && instr != nil { - // We can optimize out the unsigned extend because: - // Writes to the W register set bits [63:32] of the X register to zero - // https://developer.arm.com/documentation/den0024/a/An-Introduction-to-the-ARMv8-Instruction-Sets/The-ARMv8-instruction-sets/Distinguishing-between-32-bit-and-64-bit-A64-instructions - switch instr.Opcode() { - case - ssa.OpcodeIadd, ssa.OpcodeIsub, ssa.OpcodeLoad, - ssa.OpcodeBand, ssa.OpcodeBor, ssa.OpcodeBnot, - ssa.OpcodeIshl, ssa.OpcodeUshr, ssa.OpcodeSshr, - ssa.OpcodeRotl, ssa.OpcodeRotr, - ssa.OpcodeUload8, ssa.OpcodeUload16, ssa.OpcodeUload32: - // So, if the argument is the result of a 32-bit operation, we can just copy the register. - // It is highly likely that this copy will be optimized out after register allocation. - rn := m.compiler.VRegOf(arg) - mov := m.allocateInstr() - // Note: do not use move32 as it will be lowered to a 32-bit move, which is not copy (that is actually the impl of UExtend). - mov.asMove64(rd, rn) - m.insert(mov) - return - default: - } - } - rn := m.getOperand_NR(def, extModeNone) - - ext := m.allocateInstr() - ext.asExtend(rd, rn.nr(), from, to, signed) - m.insert(ext) -} - -func (m *machine) lowerFcmp(x, y, result ssa.Value, c ssa.FloatCmpCond) { - rn, rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone), m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - - fc := m.allocateInstr() - fc.asFpuCmp(rn, rm, x.Type().Bits() == 64) - m.insert(fc) - - cset := m.allocateInstr() - cset.asCSet(m.compiler.VRegOf(result), false, condFlagFromSSAFloatCmpCond(c)) - m.insert(cset) -} - -func (m *machine) lowerImul(x, y, result ssa.Value) { - rd := m.compiler.VRegOf(result) - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - - // TODO: if this comes before Add/Sub, we could merge it by putting it into the place of xzrVReg. - - mul := m.allocateInstr() - mul.asALURRRR(aluOpMAdd, rd, rn, rm, xzrVReg, x.Type().Bits() == 64) - m.insert(mul) -} - -func (m *machine) lowerClz(x, result ssa.Value) { - rd := m.compiler.VRegOf(result) - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - clz := m.allocateInstr() - clz.asBitRR(bitOpClz, rd, rn.nr(), x.Type().Bits() == 64) - m.insert(clz) -} - -func (m *machine) lowerCtz(x, result ssa.Value) { - rd := m.compiler.VRegOf(result) - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rbit := m.allocateInstr() - _64 := x.Type().Bits() == 64 - var tmpReg regalloc.VReg - if _64 { - tmpReg = m.compiler.AllocateVReg(ssa.TypeI64) - } else { - tmpReg = m.compiler.AllocateVReg(ssa.TypeI32) - } - rbit.asBitRR(bitOpRbit, tmpReg, rn.nr(), _64) - m.insert(rbit) - - clz := m.allocateInstr() - clz.asBitRR(bitOpClz, rd, tmpReg, _64) - m.insert(clz) -} - -func (m *machine) lowerPopcnt(x, result ssa.Value) { - // arm64 doesn't have an instruction for population count on scalar register, - // so we use the vector instruction `cnt`. - // This is exactly what the official Go implements bits.OneCount. - // For example, "func () int { return bits.OneCount(10) }" is compiled as - // - // MOVD $10, R0 ;; Load 10. - // FMOVD R0, F0 - // VCNT V0.B8, V0.B8 - // UADDLV V0.B8, V0 - // - // In aarch64 asm, FMOVD is encoded as `ins`, VCNT is `cnt`, - // and the registers may use different names. In our encoding we use the following - // instructions: - // - // ins v0.d[0], x0 ;; mov from GPR to vec (FMOV above) is encoded as INS - // cnt v0.16b, v0.16b ;; we use vec arrangement 16b - // uaddlv h0, v0.8b ;; h0 is still v0 with the dest width specifier 'H', implied when src arrangement is 8b - // mov x5, v0.d[0] ;; finally we mov the result back to a GPR - // - - rd := m.compiler.VRegOf(result) - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - - rf1 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) - ins := m.allocateInstr() - ins.asMovToVec(rf1.nr(), rn, vecArrangementD, vecIndex(0)) - m.insert(ins) - - rf2 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) - cnt := m.allocateInstr() - cnt.asVecMisc(vecOpCnt, rf2.nr(), rf1, vecArrangement16B) - m.insert(cnt) - - rf3 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) - uaddlv := m.allocateInstr() - uaddlv.asVecLanes(vecOpUaddlv, rf3.nr(), rf2, vecArrangement8B) - m.insert(uaddlv) - - mov := m.allocateInstr() - mov.asMovFromVec(rd, rf3, vecArrangementD, vecIndex(0), false) - m.insert(mov) -} - -// lowerExitWithCode lowers the lowerExitWithCode takes a context pointer as argument. -func (m *machine) lowerExitWithCode(execCtxVReg regalloc.VReg, code wazevoapi.ExitCode) { - tmpReg1 := m.compiler.AllocateVReg(ssa.TypeI32) - loadExitCodeConst := m.allocateInstr() - loadExitCodeConst.asMOVZ(tmpReg1, uint64(code), 0, true) - - setExitCode := m.allocateInstr() - mode := m.amodePool.Allocate() - *mode = addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), - } - setExitCode.asStore(operandNR(tmpReg1), mode, 32) - - // In order to unwind the stack, we also need to push the current stack pointer: - tmp2 := m.compiler.AllocateVReg(ssa.TypeI64) - movSpToTmp := m.allocateInstr() - movSpToTmp.asMove64(tmp2, spVReg) - strSpToExecCtx := m.allocateInstr() - mode2 := m.amodePool.Allocate() - *mode2 = addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), - } - strSpToExecCtx.asStore(operandNR(tmp2), mode2, 64) - // Also the address of this exit. - tmp3 := m.compiler.AllocateVReg(ssa.TypeI64) - currentAddrToTmp := m.allocateInstr() - currentAddrToTmp.asAdr(tmp3, 0) - storeCurrentAddrToExecCtx := m.allocateInstr() - mode3 := m.amodePool.Allocate() - *mode3 = addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), - } - storeCurrentAddrToExecCtx.asStore(operandNR(tmp3), mode3, 64) - - exitSeq := m.allocateInstr() - exitSeq.asExitSequence(execCtxVReg) - - m.insert(loadExitCodeConst) - m.insert(setExitCode) - m.insert(movSpToTmp) - m.insert(strSpToExecCtx) - m.insert(currentAddrToTmp) - m.insert(storeCurrentAddrToExecCtx) - m.insert(exitSeq) -} - -func (m *machine) lowerIcmpToFlag(x, y ssa.Value, signed bool) { - if x.Type() != y.Type() { - panic( - fmt.Sprintf("TODO(maybe): support icmp with different types: v%d=%s != v%d=%s", - x.ID(), x.Type(), y.ID(), y.Type())) - } - - extMod := extModeOf(x.Type(), signed) - - // First operand must be in pure register form. - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extMod) - // Second operand can be in any of Imm12, ER, SR, or NR form supported by the SUBS instructions. - rm := m.getOperand_Imm12_ER_SR_NR(m.compiler.ValueDefinition(y), extMod) - - alu := m.allocateInstr() - // subs zr, rn, rm - alu.asALU( - aluOpSubS, - // We don't need the result, just need to set flags. - xzrVReg, - rn, - rm, - x.Type().Bits() == 64, - ) - m.insert(alu) -} - -func (m *machine) lowerFcmpToFlag(x, y ssa.Value) { - if x.Type() != y.Type() { - panic("TODO(maybe): support icmp with different types") - } - - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - cmp := m.allocateInstr() - cmp.asFpuCmp(rn, rm, x.Type().Bits() == 64) - m.insert(cmp) -} - -func (m *machine) lowerExitIfTrueWithCode(execCtxVReg regalloc.VReg, cond ssa.Value, code wazevoapi.ExitCode) { - condDef := m.compiler.ValueDefinition(cond) - if !m.compiler.MatchInstr(condDef, ssa.OpcodeIcmp) { - panic("TODO: OpcodeExitIfTrueWithCode must come after Icmp at the moment: " + condDef.Instr.Opcode().String()) - } - condDef.Instr.MarkLowered() - - cvalInstr := condDef.Instr - x, y, c := cvalInstr.IcmpData() - signed := c.Signed() - - if !m.tryLowerBandToFlag(x, y) { - m.lowerIcmpToFlag(x, y, signed) - } - - // We need to copy the execution context to a temp register, because if it's spilled, - // it might end up being reloaded inside the exiting branch. - execCtxTmp := m.copyToTmp(execCtxVReg) - - // We have to skip the entire exit sequence if the condition is false. - cbr := m.allocateInstr() - m.insert(cbr) - m.lowerExitWithCode(execCtxTmp, code) - // conditional branch target is after exit. - l := m.insertBrTargetLabel() - cbr.asCondBr(condFlagFromSSAIntegerCmpCond(c).invert().asCond(), l, false /* ignored */) -} - -func (m *machine) lowerSelect(c, x, y, result ssa.Value) { - cvalDef := m.compiler.ValueDefinition(c) - - var cc condFlag - switch { - case m.compiler.MatchInstr(cvalDef, ssa.OpcodeIcmp): // This case, we can use the ALU flag set by SUBS instruction. - cvalInstr := cvalDef.Instr - x, y, c := cvalInstr.IcmpData() - cc = condFlagFromSSAIntegerCmpCond(c) - m.lowerIcmpToFlag(x, y, c.Signed()) - cvalDef.Instr.MarkLowered() - case m.compiler.MatchInstr(cvalDef, ssa.OpcodeFcmp): // This case we can use the Fpu flag directly. - cvalInstr := cvalDef.Instr - x, y, c := cvalInstr.FcmpData() - cc = condFlagFromSSAFloatCmpCond(c) - m.lowerFcmpToFlag(x, y) - cvalDef.Instr.MarkLowered() - default: - rn := m.getOperand_NR(cvalDef, extModeNone) - if c.Type() != ssa.TypeI32 && c.Type() != ssa.TypeI64 { - panic("TODO?BUG?: support select with non-integer condition") - } - alu := m.allocateInstr() - // subs zr, rn, zr - alu.asALU( - aluOpSubS, - // We don't need the result, just need to set flags. - xzrVReg, - rn, - operandNR(xzrVReg), - c.Type().Bits() == 64, - ) - m.insert(alu) - cc = ne - } - - rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - - rd := m.compiler.VRegOf(result) - switch x.Type() { - case ssa.TypeI32, ssa.TypeI64: - // csel rd, rn, rm, cc - csel := m.allocateInstr() - csel.asCSel(rd, rn, rm, cc, x.Type().Bits() == 64) - m.insert(csel) - case ssa.TypeF32, ssa.TypeF64: - // fcsel rd, rn, rm, cc - fcsel := m.allocateInstr() - fcsel.asFpuCSel(rd, rn, rm, cc, x.Type().Bits() == 64) - m.insert(fcsel) - default: - panic("BUG") - } -} - -func (m *machine) lowerSelectVec(rc, rn, rm operand, rd regalloc.VReg) { - // First check if `rc` is zero or not. - checkZero := m.allocateInstr() - checkZero.asALU(aluOpSubS, xzrVReg, rc, operandNR(xzrVReg), false) - m.insert(checkZero) - - // Then use CSETM to set all bits to one if `rc` is zero. - allOnesOrZero := m.compiler.AllocateVReg(ssa.TypeI64) - cset := m.allocateInstr() - cset.asCSet(allOnesOrZero, true, ne) - m.insert(cset) - - // Then move the bits to the result vector register. - tmp2 := m.compiler.AllocateVReg(ssa.TypeV128) - dup := m.allocateInstr() - dup.asVecDup(tmp2, operandNR(allOnesOrZero), vecArrangement2D) - m.insert(dup) - - // Now that `tmp2` has either all bits one or zero depending on `rc`, - // we can use bsl to select between `rn` and `rm`. - ins := m.allocateInstr() - ins.asVecRRRRewrite(vecOpBsl, tmp2, rn, rm, vecArrangement16B) - m.insert(ins) - - // Finally, move the result to the destination register. - mov2 := m.allocateInstr() - mov2.asFpuMov128(rd, tmp2) - m.insert(mov2) -} - -func (m *machine) lowerAtomicRmw(si *ssa.Instruction) { - ssaOp, size := si.AtomicRmwData() - - var op atomicRmwOp - var negateArg bool - var flipArg bool - switch ssaOp { - case ssa.AtomicRmwOpAdd: - op = atomicRmwOpAdd - case ssa.AtomicRmwOpSub: - op = atomicRmwOpAdd - negateArg = true - case ssa.AtomicRmwOpAnd: - op = atomicRmwOpClr - flipArg = true - case ssa.AtomicRmwOpOr: - op = atomicRmwOpSet - case ssa.AtomicRmwOpXor: - op = atomicRmwOpEor - case ssa.AtomicRmwOpXchg: - op = atomicRmwOpSwp - default: - panic(fmt.Sprintf("unknown ssa atomic rmw op: %s", ssaOp)) - } - - addr, val := si.Arg2() - addrDef, valDef := m.compiler.ValueDefinition(addr), m.compiler.ValueDefinition(val) - rn := m.getOperand_NR(addrDef, extModeNone) - rt := m.compiler.VRegOf(si.Return()) - rs := m.getOperand_NR(valDef, extModeNone) - - _64 := si.Return().Type().Bits() == 64 - var tmp regalloc.VReg - if _64 { - tmp = m.compiler.AllocateVReg(ssa.TypeI64) - } else { - tmp = m.compiler.AllocateVReg(ssa.TypeI32) - } - m.lowerAtomicRmwImpl(op, rn.nr(), rs.nr(), rt, tmp, size, negateArg, flipArg, _64) -} - -func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp regalloc.VReg, size uint64, negateArg, flipArg, dst64bit bool) { - switch { - case negateArg: - neg := m.allocateInstr() - neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit) - m.insert(neg) - case flipArg: - flip := m.allocateInstr() - flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit) - m.insert(flip) - default: - tmp = rs - } - - rmw := m.allocateInstr() - rmw.asAtomicRmw(op, rn, tmp, rt, size) - m.insert(rmw) -} - -func (m *machine) lowerAtomicCas(si *ssa.Instruction) { - addr, exp, repl := si.Arg3() - size := si.AtomicTargetSize() - - addrDef, expDef, replDef := m.compiler.ValueDefinition(addr), m.compiler.ValueDefinition(exp), m.compiler.ValueDefinition(repl) - rn := m.getOperand_NR(addrDef, extModeNone) - rt := m.getOperand_NR(replDef, extModeNone) - rs := m.getOperand_NR(expDef, extModeNone) - tmp := m.compiler.AllocateVReg(si.Return().Type()) - - _64 := si.Return().Type().Bits() == 64 - // rs is overwritten by CAS, so we need to move it to the result register before the instruction - // in case when it is used somewhere else. - mov := m.allocateInstr() - if _64 { - mov.asMove64(tmp, rs.nr()) - } else { - mov.asMove32(tmp, rs.nr()) - } - m.insert(mov) - - m.lowerAtomicCasImpl(rn.nr(), tmp, rt.nr(), size) - - mov2 := m.allocateInstr() - rd := m.compiler.VRegOf(si.Return()) - if _64 { - mov2.asMove64(rd, tmp) - } else { - mov2.asMove32(rd, tmp) - } - m.insert(mov2) -} - -func (m *machine) lowerAtomicCasImpl(rn, rs, rt regalloc.VReg, size uint64) { - cas := m.allocateInstr() - cas.asAtomicCas(rn, rs, rt, size) - m.insert(cas) -} - -func (m *machine) lowerAtomicLoad(si *ssa.Instruction) { - addr := si.Arg() - size := si.AtomicTargetSize() - - addrDef := m.compiler.ValueDefinition(addr) - rn := m.getOperand_NR(addrDef, extModeNone) - rt := m.compiler.VRegOf(si.Return()) - - m.lowerAtomicLoadImpl(rn.nr(), rt, size) -} - -func (m *machine) lowerAtomicLoadImpl(rn, rt regalloc.VReg, size uint64) { - ld := m.allocateInstr() - ld.asAtomicLoad(rn, rt, size) - m.insert(ld) -} - -func (m *machine) lowerAtomicStore(si *ssa.Instruction) { - addr, val := si.Arg2() - size := si.AtomicTargetSize() - - addrDef := m.compiler.ValueDefinition(addr) - valDef := m.compiler.ValueDefinition(val) - rn := m.getOperand_NR(addrDef, extModeNone) - rt := m.getOperand_NR(valDef, extModeNone) - - m.lowerAtomicStoreImpl(rn, rt, size) -} - -func (m *machine) lowerAtomicStoreImpl(rn, rt operand, size uint64) { - ld := m.allocateInstr() - ld.asAtomicStore(rn, rt, size) - m.insert(ld) -} - -// copyToTmp copies the given regalloc.VReg to a temporary register. This is called before cbr to avoid the regalloc issue -// e.g. reload happening in the middle of the exit sequence which is not the path the normal path executes -func (m *machine) copyToTmp(v regalloc.VReg) regalloc.VReg { - typ := m.compiler.TypeOf(v) - mov := m.allocateInstr() - tmp := m.compiler.AllocateVReg(typ) - if typ.IsInt() { - mov.asMove64(tmp, v) - } else { - mov.asFpuMov128(tmp, v) - } - m.insert(mov) - return tmp -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go deleted file mode 100644 index 7a398c3d0..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go +++ /dev/null @@ -1,340 +0,0 @@ -package arm64 - -// This file contains the logic to "find and determine operands" for instructions. -// In order to finalize the form of an operand, we might end up merging/eliminating -// the source instructions into an operand whenever possible. - -import ( - "fmt" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -type ( - // operand represents an operand of an instruction whose type is determined by the kind. - operand struct { - kind operandKind - data, data2 uint64 - } - operandKind byte -) - -// Here's the list of operand kinds. We use the abbreviation of the kind name not only for these consts, -// but also names of functions which return the operand of the kind. -const ( - // operandKindNR represents "NormalRegister" (NR). This is literally the register without any special operation unlike others. - operandKindNR operandKind = iota - // operandKindSR represents "Shifted Register" (SR). This is a register which is shifted by a constant. - // Some of the arm64 instructions can take this kind of operand. - operandKindSR - // operandKindER represents "Extended Register (ER). This is a register which is sign/zero-extended to a larger size. - // Some of the arm64 instructions can take this kind of operand. - operandKindER - // operandKindImm12 represents "Immediate 12" (Imm12). This is a 12-bit immediate value which can be either shifted by 12 or not. - // See asImm12 function for detail. - operandKindImm12 - // operandKindShiftImm represents "Shifted Immediate" (ShiftImm) used by shift operations. - operandKindShiftImm -) - -// String implements fmt.Stringer for debugging. -func (o operand) format(size byte) string { - switch o.kind { - case operandKindNR: - return formatVRegSized(o.nr(), size) - case operandKindSR: - r, amt, sop := o.sr() - return fmt.Sprintf("%s, %s #%d", formatVRegSized(r, size), sop, amt) - case operandKindER: - r, eop, _ := o.er() - return fmt.Sprintf("%s %s", formatVRegSized(r, size), eop) - case operandKindImm12: - imm12, shiftBit := o.imm12() - if shiftBit == 1 { - return fmt.Sprintf("#%#x", uint64(imm12)<<12) - } else { - return fmt.Sprintf("#%#x", imm12) - } - default: - panic(fmt.Sprintf("unknown operand kind: %d", o.kind)) - } -} - -// operandNR encodes the given VReg as an operand of operandKindNR. -func operandNR(r regalloc.VReg) operand { - return operand{kind: operandKindNR, data: uint64(r)} -} - -// nr decodes the underlying VReg assuming the operand is of operandKindNR. -func (o operand) nr() regalloc.VReg { - return regalloc.VReg(o.data) -} - -// operandER encodes the given VReg as an operand of operandKindER. -func operandER(r regalloc.VReg, eop extendOp, to byte) operand { - if to < 32 { - panic("TODO?BUG?: when we need to extend to less than 32 bits?") - } - return operand{kind: operandKindER, data: uint64(r), data2: uint64(eop)<<32 | uint64(to)} -} - -// er decodes the underlying VReg, extend operation, and the target size assuming the operand is of operandKindER. -func (o operand) er() (r regalloc.VReg, eop extendOp, to byte) { - return regalloc.VReg(o.data), extendOp(o.data2>>32) & 0xff, byte(o.data2 & 0xff) -} - -// operandSR encodes the given VReg as an operand of operandKindSR. -func operandSR(r regalloc.VReg, amt byte, sop shiftOp) operand { - return operand{kind: operandKindSR, data: uint64(r), data2: uint64(amt)<<32 | uint64(sop)} -} - -// sr decodes the underlying VReg, shift amount, and shift operation assuming the operand is of operandKindSR. -func (o operand) sr() (r regalloc.VReg, amt byte, sop shiftOp) { - return regalloc.VReg(o.data), byte(o.data2>>32) & 0xff, shiftOp(o.data2) & 0xff -} - -// operandImm12 encodes the given imm12 as an operand of operandKindImm12. -func operandImm12(imm12 uint16, shiftBit byte) operand { - return operand{kind: operandKindImm12, data: uint64(imm12) | uint64(shiftBit)<<32} -} - -// imm12 decodes the underlying imm12 data assuming the operand is of operandKindImm12. -func (o operand) imm12() (v uint16, shiftBit byte) { - return uint16(o.data), byte(o.data >> 32) -} - -// operandShiftImm encodes the given amount as an operand of operandKindShiftImm. -func operandShiftImm(amount byte) operand { - return operand{kind: operandKindShiftImm, data: uint64(amount)} -} - -// shiftImm decodes the underlying shift amount data assuming the operand is of operandKindShiftImm. -func (o operand) shiftImm() byte { - return byte(o.data) -} - -// reg returns the register of the operand if applicable. -func (o operand) reg() regalloc.VReg { - switch o.kind { - case operandKindNR: - return o.nr() - case operandKindSR: - r, _, _ := o.sr() - return r - case operandKindER: - r, _, _ := o.er() - return r - case operandKindImm12: - // Does not have a register. - case operandKindShiftImm: - // Does not have a register. - default: - panic(o.kind) - } - return regalloc.VRegInvalid -} - -func (o operand) realReg() regalloc.RealReg { - return o.nr().RealReg() -} - -func (o operand) assignReg(v regalloc.VReg) operand { - switch o.kind { - case operandKindNR: - return operandNR(v) - case operandKindSR: - _, amt, sop := o.sr() - return operandSR(v, amt, sop) - case operandKindER: - _, eop, to := o.er() - return operandER(v, eop, to) - case operandKindImm12: - // Does not have a register. - case operandKindShiftImm: - // Does not have a register. - } - panic(o.kind) -} - -// ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def). -// -// `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -// If the operand can be expressed as operandKindImm12, `mode` is ignored. -func (m *machine) getOperand_Imm12_ER_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { - if !def.IsFromInstr() { - return operandNR(m.compiler.VRegOf(def.V)) - } - - instr := def.Instr - if instr.Opcode() == ssa.OpcodeIconst { - if imm12Op, ok := asImm12Operand(instr.ConstantVal()); ok { - instr.MarkLowered() - return imm12Op - } - } - return m.getOperand_ER_SR_NR(def, mode) -} - -// getOperand_MaybeNegatedImm12_ER_SR_NR is almost the same as getOperand_Imm12_ER_SR_NR, but this might negate the immediate value. -// If the immediate value is negated, the second return value is true, otherwise always false. -func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand, negatedImm12 bool) { - if !def.IsFromInstr() { - return operandNR(m.compiler.VRegOf(def.V)), false - } - - instr := def.Instr - if instr.Opcode() == ssa.OpcodeIconst { - c := instr.ConstantVal() - if imm12Op, ok := asImm12Operand(c); ok { - instr.MarkLowered() - return imm12Op, false - } - - signExtended := int64(c) - if def.V.Type().Bits() == 32 { - signExtended = (signExtended << 32) >> 32 - } - negatedWithoutSign := -signExtended - if imm12Op, ok := asImm12Operand(uint64(negatedWithoutSign)); ok { - instr.MarkLowered() - return imm12Op, true - } - } - return m.getOperand_ER_SR_NR(def, mode), false -} - -// ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def). -// -// `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -func (m *machine) getOperand_ER_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { - if !def.IsFromInstr() { - return operandNR(m.compiler.VRegOf(def.V)) - } - - if m.compiler.MatchInstr(def, ssa.OpcodeSExtend) || m.compiler.MatchInstr(def, ssa.OpcodeUExtend) { - extInstr := def.Instr - - signed := extInstr.Opcode() == ssa.OpcodeSExtend - innerExtFromBits, innerExtToBits := extInstr.ExtendFromToBits() - modeBits, modeSigned := mode.bits(), mode.signed() - if mode == extModeNone || innerExtToBits == modeBits { - eop := extendOpFrom(signed, innerExtFromBits) - extArg := m.getOperand_NR(m.compiler.ValueDefinition(extInstr.Arg()), extModeNone) - op = operandER(extArg.nr(), eop, innerExtToBits) - extInstr.MarkLowered() - return - } - - if innerExtToBits > modeBits { - panic("BUG?TODO?: need the results of inner extension to be larger than the mode") - } - - switch { - case (!signed && !modeSigned) || (signed && modeSigned): - // Two sign/zero extensions are equivalent to one sign/zero extension for the larger size. - eop := extendOpFrom(modeSigned, innerExtFromBits) - op = operandER(m.compiler.VRegOf(extInstr.Arg()), eop, modeBits) - extInstr.MarkLowered() - case (signed && !modeSigned) || (!signed && modeSigned): - // We need to {sign, zero}-extend the result of the {zero,sign} extension. - eop := extendOpFrom(modeSigned, innerExtToBits) - op = operandER(m.compiler.VRegOf(extInstr.Return()), eop, modeBits) - // Note that we failed to merge the inner extension instruction this case. - } - return - } - return m.getOperand_SR_NR(def, mode) -} - -// ensureValueNR returns an operand of either operandKindSR or operandKindNR from the given value (defined by `def). -// -// `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -func (m *machine) getOperand_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { - if !def.IsFromInstr() { - return operandNR(m.compiler.VRegOf(def.V)) - } - - if m.compiler.MatchInstr(def, ssa.OpcodeIshl) { - // Check if the shift amount is constant instruction. - targetVal, amountVal := def.Instr.Arg2() - targetVReg := m.getOperand_NR(m.compiler.ValueDefinition(targetVal), extModeNone).nr() - amountDef := m.compiler.ValueDefinition(amountVal) - if amountDef.IsFromInstr() && amountDef.Instr.Constant() { - // If that is the case, we can use the shifted register operand (SR). - c := byte(amountDef.Instr.ConstantVal()) & (targetVal.Type().Bits() - 1) // Clears the unnecessary bits. - def.Instr.MarkLowered() - amountDef.Instr.MarkLowered() - return operandSR(targetVReg, c, shiftOpLSL) - } - } - return m.getOperand_NR(def, mode) -} - -// getOperand_ShiftImm_NR returns an operand of either operandKindShiftImm or operandKindNR from the given value (defined by `def). -func (m *machine) getOperand_ShiftImm_NR(def backend.SSAValueDefinition, mode extMode, shiftBitWidth byte) (op operand) { - if !def.IsFromInstr() { - return operandNR(m.compiler.VRegOf(def.V)) - } - - instr := def.Instr - if instr.Constant() { - amount := byte(instr.ConstantVal()) & (shiftBitWidth - 1) // Clears the unnecessary bits. - return operandShiftImm(amount) - } - return m.getOperand_NR(def, mode) -} - -// ensureValueNR returns an operand of operandKindNR from the given value (defined by `def). -// -// `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -func (m *machine) getOperand_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { - var v regalloc.VReg - if def.IsFromInstr() && def.Instr.Constant() { - // We inline all the constant instructions so that we could reduce the register usage. - v = m.lowerConstant(def.Instr) - def.Instr.MarkLowered() - } else { - v = m.compiler.VRegOf(def.V) - } - - r := v - switch inBits := def.V.Type().Bits(); { - case mode == extModeNone: - case inBits == 32 && (mode == extModeZeroExtend32 || mode == extModeSignExtend32): - case inBits == 32 && mode == extModeZeroExtend64: - extended := m.compiler.AllocateVReg(ssa.TypeI64) - ext := m.allocateInstr() - ext.asExtend(extended, v, 32, 64, false) - m.insert(ext) - r = extended - case inBits == 32 && mode == extModeSignExtend64: - extended := m.compiler.AllocateVReg(ssa.TypeI64) - ext := m.allocateInstr() - ext.asExtend(extended, v, 32, 64, true) - m.insert(ext) - r = extended - case inBits == 64 && (mode == extModeZeroExtend64 || mode == extModeSignExtend64): - } - return operandNR(r) -} - -func asImm12Operand(val uint64) (op operand, ok bool) { - v, shiftBit, ok := asImm12(val) - if !ok { - return operand{}, false - } - return operandImm12(v, shiftBit), true -} - -func asImm12(val uint64) (v uint16, shiftBit byte, ok bool) { - const mask1, mask2 uint64 = 0xfff, 0xfff_000 - if val&^mask1 == 0 { - return uint16(val), 0, true - } else if val&^mask2 == 0 { - return uint16(val >> 12), 1, true - } else { - return 0, 0, false - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go deleted file mode 100644 index fd0760d72..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go +++ /dev/null @@ -1,451 +0,0 @@ -package arm64 - -import ( - "fmt" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -type ( - // addressMode represents an ARM64 addressing mode. - // - // https://developer.arm.com/documentation/102374/0101/Loads-and-stores---addressing - // TODO: use the bit-packed layout like operand struct. - addressMode struct { - kind addressModeKind - rn, rm regalloc.VReg - extOp extendOp - imm int64 - } - - // addressModeKind represents the kind of ARM64 addressing mode. - addressModeKind byte -) - -func resetAddressMode(a *addressMode) { - a.kind = 0 - a.rn = 0 - a.rm = 0 - a.extOp = 0 - a.imm = 0 -} - -const ( - // addressModeKindRegExtended takes a base register and an index register. The index register is sign/zero-extended, - // and then scaled by bits(type)/8. - // - // e.g. - // - ldrh w1, [x2, w3, SXTW #1] ;; sign-extended and scaled by 2 (== LSL #1) - // - strh w1, [x2, w3, UXTW #1] ;; zero-extended and scaled by 2 (== LSL #1) - // - ldr w1, [x2, w3, SXTW #2] ;; sign-extended and scaled by 4 (== LSL #2) - // - str x1, [x2, w3, UXTW #3] ;; zero-extended and scaled by 8 (== LSL #3) - // - // See the following pages: - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--register---Load-Register-Halfword--register-- - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register-- - addressModeKindRegScaledExtended addressModeKind = iota - - // addressModeKindRegScaled is the same as addressModeKindRegScaledExtended, but without extension factor. - addressModeKindRegScaled - - // addressModeKindRegScaled is the same as addressModeKindRegScaledExtended, but without scale factor. - addressModeKindRegExtended - - // addressModeKindRegReg takes a base register and an index register. The index register is not either scaled or extended. - addressModeKindRegReg - - // addressModeKindRegSignedImm9 takes a base register and a 9-bit "signed" immediate offset (-256 to 255). - // The immediate will be sign-extended, and be added to the base register. - // This is a.k.a. "unscaled" since the immediate is not scaled. - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled-- - addressModeKindRegSignedImm9 - - // addressModeKindRegUnsignedImm12 takes a base register and a 12-bit "unsigned" immediate offset. scaled by - // the size of the type. In other words, the actual offset will be imm12 * bits(type)/8. - // See "Unsigned offset" in the following pages: - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate-- - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-- - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate-- - addressModeKindRegUnsignedImm12 - - // addressModePostIndex takes a base register and a 9-bit "signed" immediate offset. - // After the load/store, the base register will be updated by the offset. - // - // Note that when this is used for pair load/store, the offset will be 7-bit "signed" immediate offset. - // - // See "Post-index" in the following pages for examples: - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate-- - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-- - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate-- - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers- - addressModeKindPostIndex - - // addressModePostIndex takes a base register and a 9-bit "signed" immediate offset. - // Before the load/store, the base register will be updated by the offset. - // - // Note that when this is used for pair load/store, the offset will be 7-bit "signed" immediate offset. - // - // See "Pre-index" in the following pages for examples: - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate-- - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-- - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate-- - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers- - addressModeKindPreIndex - - // addressModeKindArgStackSpace is used to resolve the address of the argument stack space - // exiting right above the stack pointer. Since we don't know the exact stack space needed for a function - // at a compilation phase, this is used as a placeholder and further lowered to a real addressing mode like above. - addressModeKindArgStackSpace - - // addressModeKindResultStackSpace is used to resolve the address of the result stack space - // exiting right above the stack pointer. Since we don't know the exact stack space needed for a function - // at a compilation phase, this is used as a placeholder and further lowered to a real addressing mode like above. - addressModeKindResultStackSpace -) - -func (a addressMode) format(dstSizeBits byte) (ret string) { - base := formatVRegSized(a.rn, 64) - if rn := a.rn; rn.RegType() != regalloc.RegTypeInt { - panic("invalid base register type: " + a.rn.RegType().String()) - } else if rn.IsRealReg() && v0 <= a.rn.RealReg() && a.rn.RealReg() <= v30 { - panic("BUG: likely a bug in reg alloc or reset behavior") - } - - switch a.kind { - case addressModeKindRegScaledExtended: - amount := a.sizeInBitsToShiftAmount(dstSizeBits) - ret = fmt.Sprintf("[%s, %s, %s #%#x]", base, formatVRegSized(a.rm, a.indexRegBits()), a.extOp, amount) - case addressModeKindRegScaled: - amount := a.sizeInBitsToShiftAmount(dstSizeBits) - ret = fmt.Sprintf("[%s, %s, lsl #%#x]", base, formatVRegSized(a.rm, a.indexRegBits()), amount) - case addressModeKindRegExtended: - ret = fmt.Sprintf("[%s, %s, %s]", base, formatVRegSized(a.rm, a.indexRegBits()), a.extOp) - case addressModeKindRegReg: - ret = fmt.Sprintf("[%s, %s]", base, formatVRegSized(a.rm, a.indexRegBits())) - case addressModeKindRegSignedImm9: - if a.imm != 0 { - ret = fmt.Sprintf("[%s, #%#x]", base, a.imm) - } else { - ret = fmt.Sprintf("[%s]", base) - } - case addressModeKindRegUnsignedImm12: - if a.imm != 0 { - ret = fmt.Sprintf("[%s, #%#x]", base, a.imm) - } else { - ret = fmt.Sprintf("[%s]", base) - } - case addressModeKindPostIndex: - ret = fmt.Sprintf("[%s], #%#x", base, a.imm) - case addressModeKindPreIndex: - ret = fmt.Sprintf("[%s, #%#x]!", base, a.imm) - case addressModeKindArgStackSpace: - ret = fmt.Sprintf("[#arg_space, #%#x]", a.imm) - case addressModeKindResultStackSpace: - ret = fmt.Sprintf("[#ret_space, #%#x]", a.imm) - } - return -} - -func addressModePreOrPostIndex(m *machine, rn regalloc.VReg, imm int64, preIndex bool) *addressMode { - if !offsetFitsInAddressModeKindRegSignedImm9(imm) { - panic(fmt.Sprintf("BUG: offset %#x does not fit in addressModeKindRegSignedImm9", imm)) - } - mode := m.amodePool.Allocate() - if preIndex { - *mode = addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm} - } else { - *mode = addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm} - } - return mode -} - -func offsetFitsInAddressModeKindRegUnsignedImm12(dstSizeInBits byte, offset int64) bool { - divisor := int64(dstSizeInBits) / 8 - return 0 < offset && offset%divisor == 0 && offset/divisor < 4096 -} - -func offsetFitsInAddressModeKindRegSignedImm9(offset int64) bool { - return -256 <= offset && offset <= 255 -} - -func (a addressMode) indexRegBits() byte { - bits := a.extOp.srcBits() - if bits != 32 && bits != 64 { - panic("invalid index register for address mode. it must be either 32 or 64 bits") - } - return bits -} - -func (a addressMode) sizeInBitsToShiftAmount(sizeInBits byte) (lsl byte) { - switch sizeInBits { - case 8: - lsl = 0 - case 16: - lsl = 1 - case 32: - lsl = 2 - case 64: - lsl = 3 - } - return -} - -func extLoadSignSize(op ssa.Opcode) (size byte, signed bool) { - switch op { - case ssa.OpcodeUload8: - size, signed = 8, false - case ssa.OpcodeUload16: - size, signed = 16, false - case ssa.OpcodeUload32: - size, signed = 32, false - case ssa.OpcodeSload8: - size, signed = 8, true - case ssa.OpcodeSload16: - size, signed = 16, true - case ssa.OpcodeSload32: - size, signed = 32, true - default: - panic("BUG") - } - return -} - -func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, ret regalloc.VReg) { - size, signed := extLoadSignSize(op) - amode := m.lowerToAddressMode(ptr, offset, size) - load := m.allocateInstr() - if signed { - load.asSLoad(ret, amode, size) - } else { - load.asULoad(ret, amode, size) - } - m.insert(load) -} - -func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, ret ssa.Value) { - amode := m.lowerToAddressMode(ptr, offset, typ.Bits()) - - dst := m.compiler.VRegOf(ret) - load := m.allocateInstr() - switch typ { - case ssa.TypeI32, ssa.TypeI64: - load.asULoad(dst, amode, typ.Bits()) - case ssa.TypeF32, ssa.TypeF64: - load.asFpuLoad(dst, amode, typ.Bits()) - case ssa.TypeV128: - load.asFpuLoad(dst, amode, 128) - default: - panic("TODO") - } - m.insert(load) -} - -func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, lane ssa.VecLane, ret ssa.Value) { - // vecLoad1R has offset address mode (base+imm) only for post index, so we simply add the offset to the base. - base := m.getOperand_NR(m.compiler.ValueDefinition(ptr), extModeNone).nr() - offsetReg := m.compiler.AllocateVReg(ssa.TypeI64) - m.lowerConstantI64(offsetReg, int64(offset)) - addedBase := m.addReg64ToReg64(base, offsetReg) - - rd := m.compiler.VRegOf(ret) - - ld1r := m.allocateInstr() - ld1r.asVecLoad1R(rd, operandNR(addedBase), ssaLaneToArrangement(lane)) - m.insert(ld1r) -} - -func (m *machine) lowerStore(si *ssa.Instruction) { - // TODO: merge consecutive stores into a single pair store instruction. - value, ptr, offset, storeSizeInBits := si.StoreData() - amode := m.lowerToAddressMode(ptr, offset, storeSizeInBits) - - valueOp := m.getOperand_NR(m.compiler.ValueDefinition(value), extModeNone) - store := m.allocateInstr() - store.asStore(valueOp, amode, storeSizeInBits) - m.insert(store) -} - -// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions. -func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode *addressMode) { - // TODO: currently the instruction selection logic doesn't support addressModeKindRegScaledExtended and - // addressModeKindRegScaled since collectAddends doesn't take ssa.OpcodeIshl into account. This should be fixed - // to support more efficient address resolution. - - a32s, a64s, offset := m.collectAddends(ptr) - offset += int64(offsetBase) - return m.lowerToAddressModeFromAddends(a32s, a64s, size, offset) -} - -// lowerToAddressModeFromAddends creates an addressMode from a list of addends collected by collectAddends. -// During the construction, this might emit additional instructions. -// -// Extracted as a separate function for easy testing. -func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode *addressMode) { - amode = m.amodePool.Allocate() - switch a64sExist, a32sExist := !a64s.Empty(), !a32s.Empty(); { - case a64sExist && a32sExist: - var base regalloc.VReg - base = a64s.Dequeue() - var a32 addend32 - a32 = a32s.Dequeue() - *amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext} - case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset): - var base regalloc.VReg - base = a64s.Dequeue() - *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset} - offset = 0 - case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset): - var base regalloc.VReg - base = a64s.Dequeue() - *amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset} - offset = 0 - case a64sExist: - var base regalloc.VReg - base = a64s.Dequeue() - if !a64s.Empty() { - index := a64s.Dequeue() - *amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */} - } else { - *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} - } - case a32sExist: - base32 := a32s.Dequeue() - - // First we need 64-bit base. - base := m.compiler.AllocateVReg(ssa.TypeI64) - baseExt := m.allocateInstr() - var signed bool - if base32.ext == extendOpSXTW { - signed = true - } - baseExt.asExtend(base, base32.r, 32, 64, signed) - m.insert(baseExt) - - if !a32s.Empty() { - index := a32s.Dequeue() - *amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext} - } else { - *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} - } - default: // Only static offsets. - tmpReg := m.compiler.AllocateVReg(ssa.TypeI64) - m.lowerConstantI64(tmpReg, offset) - *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0} - offset = 0 - } - - baseReg := amode.rn - if offset > 0 { - baseReg = m.addConstToReg64(baseReg, offset) // baseReg += offset - } - - for !a64s.Empty() { - a64 := a64s.Dequeue() - baseReg = m.addReg64ToReg64(baseReg, a64) // baseReg += a64 - } - - for !a32s.Empty() { - a32 := a32s.Dequeue() - baseReg = m.addRegToReg64Ext(baseReg, a32.r, a32.ext) // baseReg += (a32 extended to 64-bit) - } - amode.rn = baseReg - return -} - -var addendsMatchOpcodes = [4]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst} - -func (m *machine) collectAddends(ptr ssa.Value) (addends32 *wazevoapi.Queue[addend32], addends64 *wazevoapi.Queue[regalloc.VReg], offset int64) { - m.addendsWorkQueue.Reset() - m.addends32.Reset() - m.addends64.Reset() - m.addendsWorkQueue.Enqueue(ptr) - - for !m.addendsWorkQueue.Empty() { - v := m.addendsWorkQueue.Dequeue() - - def := m.compiler.ValueDefinition(v) - switch op := m.compiler.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op { - case ssa.OpcodeIadd: - // If the addend is an add, we recursively collect its operands. - x, y := def.Instr.Arg2() - m.addendsWorkQueue.Enqueue(x) - m.addendsWorkQueue.Enqueue(y) - def.Instr.MarkLowered() - case ssa.OpcodeIconst: - // If the addend is constant, we just statically merge it into the offset. - ic := def.Instr - u64 := ic.ConstantVal() - if ic.Return().Type().Bits() == 32 { - offset += int64(int32(u64)) // sign-extend. - } else { - offset += int64(u64) - } - def.Instr.MarkLowered() - case ssa.OpcodeUExtend, ssa.OpcodeSExtend: - input := def.Instr.Arg() - if input.Type().Bits() != 32 { - panic("illegal size: " + input.Type().String()) - } - - var ext extendOp - if op == ssa.OpcodeUExtend { - ext = extendOpUXTW - } else { - ext = extendOpSXTW - } - - inputDef := m.compiler.ValueDefinition(input) - constInst := inputDef.IsFromInstr() && inputDef.Instr.Constant() - switch { - case constInst && ext == extendOpUXTW: - // Zero-extension of a 32-bit constant can be merged into the offset. - offset += int64(uint32(inputDef.Instr.ConstantVal())) - case constInst && ext == extendOpSXTW: - // Sign-extension of a 32-bit constant can be merged into the offset. - offset += int64(int32(inputDef.Instr.ConstantVal())) // sign-extend! - default: - m.addends32.Enqueue(addend32{r: m.getOperand_NR(inputDef, extModeNone).nr(), ext: ext}) - } - def.Instr.MarkLowered() - continue - default: - // If the addend is not one of them, we simply use it as-is (without merging!), optionally zero-extending it. - m.addends64.Enqueue(m.getOperand_NR(def, extModeZeroExtend64 /* optional zero ext */).nr()) - } - } - return &m.addends32, &m.addends64, offset -} - -func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) { - rd = m.compiler.AllocateVReg(ssa.TypeI64) - alu := m.allocateInstr() - if imm12Op, ok := asImm12Operand(uint64(c)); ok { - alu.asALU(aluOpAdd, rd, operandNR(r), imm12Op, true) - } else if imm12Op, ok = asImm12Operand(uint64(-c)); ok { - alu.asALU(aluOpSub, rd, operandNR(r), imm12Op, true) - } else { - tmp := m.compiler.AllocateVReg(ssa.TypeI64) - m.load64bitConst(c, tmp) - alu.asALU(aluOpAdd, rd, operandNR(r), operandNR(tmp), true) - } - m.insert(alu) - return -} - -func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) { - rd = m.compiler.AllocateVReg(ssa.TypeI64) - alu := m.allocateInstr() - alu.asALU(aluOpAdd, rd, operandNR(rn), operandNR(rm), true) - m.insert(alu) - return -} - -func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regalloc.VReg) { - rd = m.compiler.AllocateVReg(ssa.TypeI64) - alu := m.allocateInstr() - alu.asALU(aluOpAdd, rd, operandNR(rn), operandER(rm, ext, 64), true) - m.insert(alu) - return -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go deleted file mode 100644 index 00e6b238f..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go +++ /dev/null @@ -1,631 +0,0 @@ -package arm64 - -import ( - "context" - "fmt" - "math" - "strings" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -type ( - // machine implements backend.Machine. - machine struct { - compiler backend.Compiler - currentABI *backend.FunctionABI - instrPool wazevoapi.Pool[instruction] - // labelPositionPool is the pool of labelPosition. The id is the label where - // if the label is less than the maxSSABlockID, it's the ssa.BasicBlockID. - labelPositionPool wazevoapi.IDedPool[labelPosition] - - // nextLabel is the next label to be allocated. The first free label comes after maxSSABlockID - // so that we can have an identical label for the SSA block ID, which is useful for debugging. - nextLabel label - // rootInstr is the first instruction of the function. - rootInstr *instruction - // currentLabelPos is the currently-compiled ssa.BasicBlock's labelPosition. - currentLabelPos *labelPosition - // orderedSSABlockLabelPos is the ordered list of labelPosition in the generated code for each ssa.BasicBlock. - orderedSSABlockLabelPos []*labelPosition - // returnLabelPos is the labelPosition for the return block. - returnLabelPos labelPosition - // perBlockHead and perBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock. - perBlockHead, perBlockEnd *instruction - // pendingInstructions are the instructions which are not yet emitted into the instruction list. - pendingInstructions []*instruction - // maxSSABlockID is the maximum ssa.BasicBlockID in the current function. - maxSSABlockID label - - regAlloc regalloc.Allocator[*instruction, *labelPosition, *regAllocFn] - regAllocFn regAllocFn - - amodePool wazevoapi.Pool[addressMode] - - // addendsWorkQueue is used during address lowering, defined here for reuse. - addendsWorkQueue wazevoapi.Queue[ssa.Value] - addends32 wazevoapi.Queue[addend32] - // addends64 is used during address lowering, defined here for reuse. - addends64 wazevoapi.Queue[regalloc.VReg] - unresolvedAddressModes []*instruction - - // condBrRelocs holds the conditional branches which need offset relocation. - condBrRelocs []condBrReloc - - // jmpTableTargets holds the labels of the jump table targets. - jmpTableTargets [][]uint32 - // jmpTableTargetNext is the index to the jmpTableTargets slice to be used for the next jump table. - jmpTableTargetsNext int - - // spillSlotSize is the size of the stack slot in bytes used for spilling registers. - // During the execution of the function, the stack looks like: - // - // - // (high address) - // +-----------------+ - // | ....... | - // | ret Y | - // | ....... | - // | ret 0 | - // | arg X | - // | ....... | - // | arg 1 | - // | arg 0 | - // | xxxxx | - // | ReturnAddress | - // +-----------------+ <<-| - // | ........... | | - // | spill slot M | | <--- spillSlotSize - // | ............ | | - // | spill slot 2 | | - // | spill slot 1 | <<-+ - // | clobbered N | - // | ........... | - // | clobbered 1 | - // | clobbered 0 | - // SP---> +-----------------+ - // (low address) - // - // and it represents the size of the space between FP and the first spilled slot. This must be a multiple of 16. - // Also note that this is only known after register allocation. - spillSlotSize int64 - spillSlots map[regalloc.VRegID]int64 // regalloc.VRegID to offset. - // clobberedRegs holds real-register backed VRegs saved at the function prologue, and restored at the epilogue. - clobberedRegs []regalloc.VReg - - maxRequiredStackSizeForCalls int64 - stackBoundsCheckDisabled bool - - regAllocStarted bool - } - - addend32 struct { - r regalloc.VReg - ext extendOp - } - - condBrReloc struct { - cbr *instruction - // currentLabelPos is the labelPosition within which condBr is defined. - currentLabelPos *labelPosition - // Next block's labelPosition. - nextLabel label - offset int64 - } -) - -type ( - // label represents a position in the generated code which is either - // a real instruction or the constant InstructionPool (e.g. jump tables). - // - // This is exactly the same as the traditional "label" in assembly code. - label uint32 - - // labelPosition represents the regions of the generated code which the label represents. - // This implements regalloc.Block. - labelPosition struct { - // sb is not nil if this corresponds to a ssa.BasicBlock. - sb ssa.BasicBlock - // cur is used to walk through the instructions in the block during the register allocation. - cur, - // begin and end are the first and last instructions of the block. - begin, end *instruction - // binaryOffset is the offset in the binary where the label is located. - binaryOffset int64 - } -) - -const ( - labelReturn label = math.MaxUint32 - labelInvalid = labelReturn - 1 -) - -// String implements backend.Machine. -func (l label) String() string { - return fmt.Sprintf("L%d", l) -} - -func resetLabelPosition(l *labelPosition) { - *l = labelPosition{} -} - -// NewBackend returns a new backend for arm64. -func NewBackend() backend.Machine { - m := &machine{ - spillSlots: make(map[regalloc.VRegID]int64), - regAlloc: regalloc.NewAllocator[*instruction, *labelPosition, *regAllocFn](regInfo), - amodePool: wazevoapi.NewPool[addressMode](resetAddressMode), - instrPool: wazevoapi.NewPool[instruction](resetInstruction), - labelPositionPool: wazevoapi.NewIDedPool[labelPosition](resetLabelPosition), - } - m.regAllocFn.m = m - return m -} - -func ssaBlockLabel(sb ssa.BasicBlock) label { - if sb.ReturnBlock() { - return labelReturn - } - return label(sb.ID()) -} - -// getOrAllocateSSABlockLabelPosition returns the labelPosition for the given basic block. -func (m *machine) getOrAllocateSSABlockLabelPosition(sb ssa.BasicBlock) *labelPosition { - if sb.ReturnBlock() { - m.returnLabelPos.sb = sb - return &m.returnLabelPos - } - - l := ssaBlockLabel(sb) - pos := m.labelPositionPool.GetOrAllocate(int(l)) - pos.sb = sb - return pos -} - -// LinkAdjacentBlocks implements backend.Machine. -func (m *machine) LinkAdjacentBlocks(prev, next ssa.BasicBlock) { - prevPos, nextPos := m.getOrAllocateSSABlockLabelPosition(prev), m.getOrAllocateSSABlockLabelPosition(next) - prevPos.end.next = nextPos.begin -} - -// StartBlock implements backend.Machine. -func (m *machine) StartBlock(blk ssa.BasicBlock) { - m.currentLabelPos = m.getOrAllocateSSABlockLabelPosition(blk) - labelPos := m.currentLabelPos - end := m.allocateNop() - m.perBlockHead, m.perBlockEnd = end, end - labelPos.begin, labelPos.end = end, end - m.orderedSSABlockLabelPos = append(m.orderedSSABlockLabelPos, labelPos) -} - -// EndBlock implements ExecutableContext. -func (m *machine) EndBlock() { - // Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions. - m.insertAtPerBlockHead(m.allocateNop()) - - m.currentLabelPos.begin = m.perBlockHead - - if m.currentLabelPos.sb.EntryBlock() { - m.rootInstr = m.perBlockHead - } -} - -func (m *machine) insertAtPerBlockHead(i *instruction) { - if m.perBlockHead == nil { - m.perBlockHead = i - m.perBlockEnd = i - return - } - - i.next = m.perBlockHead - m.perBlockHead.prev = i - m.perBlockHead = i -} - -// FlushPendingInstructions implements backend.Machine. -func (m *machine) FlushPendingInstructions() { - l := len(m.pendingInstructions) - if l == 0 { - return - } - for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order. - m.insertAtPerBlockHead(m.pendingInstructions[i]) - } - m.pendingInstructions = m.pendingInstructions[:0] -} - -// RegAlloc implements backend.Machine Function. -func (m *machine) RegAlloc() { - m.regAllocStarted = true - m.regAlloc.DoAllocation(&m.regAllocFn) - // Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes. - m.spillSlotSize = (m.spillSlotSize + 15) &^ 15 -} - -// Reset implements backend.Machine. -func (m *machine) Reset() { - m.clobberedRegs = m.clobberedRegs[:0] - for key := range m.spillSlots { - m.clobberedRegs = append(m.clobberedRegs, regalloc.VReg(key)) - } - for _, key := range m.clobberedRegs { - delete(m.spillSlots, regalloc.VRegID(key)) - } - m.clobberedRegs = m.clobberedRegs[:0] - m.regAllocStarted = false - m.regAlloc.Reset() - m.spillSlotSize = 0 - m.unresolvedAddressModes = m.unresolvedAddressModes[:0] - m.maxRequiredStackSizeForCalls = 0 - m.jmpTableTargetsNext = 0 - m.amodePool.Reset() - m.instrPool.Reset() - m.labelPositionPool.Reset() - m.pendingInstructions = m.pendingInstructions[:0] - m.perBlockHead, m.perBlockEnd, m.rootInstr = nil, nil, nil - m.orderedSSABlockLabelPos = m.orderedSSABlockLabelPos[:0] -} - -// StartLoweringFunction implements backend.Machine StartLoweringFunction. -func (m *machine) StartLoweringFunction(maxBlockID ssa.BasicBlockID) { - m.maxSSABlockID = label(maxBlockID) - m.nextLabel = label(maxBlockID) + 1 -} - -// SetCurrentABI implements backend.Machine SetCurrentABI. -func (m *machine) SetCurrentABI(abi *backend.FunctionABI) { - m.currentABI = abi -} - -// DisableStackCheck implements backend.Machine DisableStackCheck. -func (m *machine) DisableStackCheck() { - m.stackBoundsCheckDisabled = true -} - -// SetCompiler implements backend.Machine. -func (m *machine) SetCompiler(ctx backend.Compiler) { - m.compiler = ctx - m.regAllocFn.ssaB = ctx.SSABuilder() -} - -func (m *machine) insert(i *instruction) { - m.pendingInstructions = append(m.pendingInstructions, i) -} - -func (m *machine) insertBrTargetLabel() label { - nop, l := m.allocateBrTarget() - m.insert(nop) - return l -} - -func (m *machine) allocateBrTarget() (nop *instruction, l label) { - l = m.nextLabel - m.nextLabel++ - nop = m.allocateInstr() - nop.asNop0WithLabel(l) - pos := m.labelPositionPool.GetOrAllocate(int(l)) - pos.begin, pos.end = nop, nop - return -} - -// allocateInstr allocates an instruction. -func (m *machine) allocateInstr() *instruction { - instr := m.instrPool.Allocate() - if !m.regAllocStarted { - instr.addedBeforeRegAlloc = true - } - return instr -} - -func resetInstruction(i *instruction) { - *i = instruction{} -} - -func (m *machine) allocateNop() *instruction { - instr := m.allocateInstr() - instr.asNop0() - return instr -} - -func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) { - amode := i.getAmode() - switch amode.kind { - case addressModeKindResultStackSpace: - amode.imm += ret0offset - case addressModeKindArgStackSpace: - amode.imm += arg0offset - default: - panic("BUG") - } - - var sizeInBits byte - switch i.kind { - case store8, uLoad8: - sizeInBits = 8 - case store16, uLoad16: - sizeInBits = 16 - case store32, fpuStore32, uLoad32, fpuLoad32: - sizeInBits = 32 - case store64, fpuStore64, uLoad64, fpuLoad64: - sizeInBits = 64 - case fpuStore128, fpuLoad128: - sizeInBits = 128 - default: - panic("BUG") - } - - if offsetFitsInAddressModeKindRegUnsignedImm12(sizeInBits, amode.imm) { - amode.kind = addressModeKindRegUnsignedImm12 - } else { - // This case, we load the offset into the temporary register, - // and then use it as the index register. - newPrev := m.lowerConstantI64AndInsert(i.prev, tmpRegVReg, amode.imm) - linkInstr(newPrev, i) - *amode = addressMode{kind: addressModeKindRegReg, rn: amode.rn, rm: tmpRegVReg, extOp: extendOpUXTX /* indicates rm reg is 64-bit */} - } -} - -// resolveRelativeAddresses resolves the relative addresses before encoding. -func (m *machine) resolveRelativeAddresses(ctx context.Context) { - for { - if len(m.unresolvedAddressModes) > 0 { - arg0offset, ret0offset := m.arg0OffsetFromSP(), m.ret0OffsetFromSP() - for _, i := range m.unresolvedAddressModes { - m.resolveAddressingMode(arg0offset, ret0offset, i) - } - } - - // Reuse the slice to gather the unresolved conditional branches. - m.condBrRelocs = m.condBrRelocs[:0] - - var fn string - var fnIndex int - var labelPosToLabel map[*labelPosition]label - if wazevoapi.PerfMapEnabled { - labelPosToLabel = make(map[*labelPosition]label) - for i := 0; i <= m.labelPositionPool.MaxIDEncountered(); i++ { - labelPosToLabel[m.labelPositionPool.Get(i)] = label(i) - } - - fn = wazevoapi.GetCurrentFunctionName(ctx) - fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx) - } - - // Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label. - var offset int64 - for i, pos := range m.orderedSSABlockLabelPos { - pos.binaryOffset = offset - var size int64 - for cur := pos.begin; ; cur = cur.next { - switch cur.kind { - case nop0: - l := cur.nop0Label() - if pos := m.labelPositionPool.Get(int(l)); pos != nil { - pos.binaryOffset = offset + size - } - case condBr: - if !cur.condBrOffsetResolved() { - var nextLabel label - if i < len(m.orderedSSABlockLabelPos)-1 { - // Note: this is only used when the block ends with fallthrough, - // therefore can be safely assumed that the next block exists when it's needed. - nextLabel = ssaBlockLabel(m.orderedSSABlockLabelPos[i+1].sb) - } - m.condBrRelocs = append(m.condBrRelocs, condBrReloc{ - cbr: cur, currentLabelPos: pos, offset: offset + size, - nextLabel: nextLabel, - }) - } - } - size += cur.size() - if cur == pos.end { - break - } - } - - if wazevoapi.PerfMapEnabled { - if size > 0 { - wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelPosToLabel[pos])) - } - } - offset += size - } - - // Before resolving any offsets, we need to check if all the conditional branches can be resolved. - var needRerun bool - for i := range m.condBrRelocs { - reloc := &m.condBrRelocs[i] - cbr := reloc.cbr - offset := reloc.offset - - target := cbr.condBrLabel() - offsetOfTarget := m.labelPositionPool.Get(int(target)).binaryOffset - diff := offsetOfTarget - offset - if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 { - // This case the conditional branch is too huge. We place the trampoline instructions at the end of the current block, - // and jump to it. - m.insertConditionalJumpTrampoline(cbr, reloc.currentLabelPos, reloc.nextLabel) - // Then, we need to recall this function to fix up the label offsets - // as they have changed after the trampoline is inserted. - needRerun = true - } - } - if needRerun { - if wazevoapi.PerfMapEnabled { - wazevoapi.PerfMap.Clear() - } - } else { - break - } - } - - var currentOffset int64 - for cur := m.rootInstr; cur != nil; cur = cur.next { - switch cur.kind { - case br: - target := cur.brLabel() - offsetOfTarget := m.labelPositionPool.Get(int(target)).binaryOffset - diff := offsetOfTarget - currentOffset - divided := diff >> 2 - if divided < minSignedInt26 || divided > maxSignedInt26 { - // This means the currently compiled single function is extremely large. - panic("too large function that requires branch relocation of large unconditional branch larger than 26-bit range") - } - cur.brOffsetResolve(diff) - case condBr: - if !cur.condBrOffsetResolved() { - target := cur.condBrLabel() - offsetOfTarget := m.labelPositionPool.Get(int(target)).binaryOffset - diff := offsetOfTarget - currentOffset - if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 { - panic("BUG: branch relocation for large conditional branch larger than 19-bit range must be handled properly") - } - cur.condBrOffsetResolve(diff) - } - case brTableSequence: - tableIndex := cur.u1 - targets := m.jmpTableTargets[tableIndex] - for i := range targets { - l := label(targets[i]) - offsetOfTarget := m.labelPositionPool.Get(int(l)).binaryOffset - diff := offsetOfTarget - (currentOffset + brTableSequenceOffsetTableBegin) - targets[i] = uint32(diff) - } - cur.brTableSequenceOffsetsResolved() - case emitSourceOffsetInfo: - m.compiler.AddSourceOffsetInfo(currentOffset, cur.sourceOffsetInfo()) - } - currentOffset += cur.size() - } -} - -const ( - maxSignedInt26 = 1<<25 - 1 - minSignedInt26 = -(1 << 25) - - maxSignedInt19 = 1<<18 - 1 - minSignedInt19 = -(1 << 18) -) - -func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *labelPosition, nextLabel label) { - cur := currentBlk.end - originalTarget := cbr.condBrLabel() - endNext := cur.next - - if cur.kind != br { - // If the current block ends with a conditional branch, we can just insert the trampoline after it. - // Otherwise, we need to insert "skip" instruction to skip the trampoline instructions. - skip := m.allocateInstr() - skip.asBr(nextLabel) - cur = linkInstr(cur, skip) - } - - cbrNewTargetInstr, cbrNewTargetLabel := m.allocateBrTarget() - cbr.setCondBrTargets(cbrNewTargetLabel) - cur = linkInstr(cur, cbrNewTargetInstr) - - // Then insert the unconditional branch to the original, which should be possible to get encoded - // as 26-bit offset should be enough for any practical application. - br := m.allocateInstr() - br.asBr(originalTarget) - cur = linkInstr(cur, br) - - // Update the end of the current block. - currentBlk.end = cur - - linkInstr(cur, endNext) -} - -// Format implements backend.Machine. -func (m *machine) Format() string { - begins := map[*instruction]label{} - for l := label(0); l < m.nextLabel; l++ { - pos := m.labelPositionPool.Get(int(l)) - if pos != nil { - begins[pos.begin] = l - } - } - - var lines []string - for cur := m.rootInstr; cur != nil; cur = cur.next { - if l, ok := begins[cur]; ok { - var labelStr string - if l <= m.maxSSABlockID { - labelStr = fmt.Sprintf("%s (SSA Block: blk%d):", l, int(l)) - } else { - labelStr = fmt.Sprintf("%s:", l) - } - lines = append(lines, labelStr) - } - if cur.kind == nop0 { - continue - } - lines = append(lines, "\t"+cur.String()) - } - return "\n" + strings.Join(lines, "\n") + "\n" -} - -// InsertReturn implements backend.Machine. -func (m *machine) InsertReturn() { - i := m.allocateInstr() - i.asRet() - m.insert(i) -} - -func (m *machine) getVRegSpillSlotOffsetFromSP(id regalloc.VRegID, size byte) int64 { - offset, ok := m.spillSlots[id] - if !ok { - offset = m.spillSlotSize - // TODO: this should be aligned depending on the `size` to use Imm12 offset load/store as much as possible. - m.spillSlots[id] = offset - m.spillSlotSize += int64(size) - } - return offset + 16 // spill slot starts above the clobbered registers and the frame size. -} - -func (m *machine) clobberedRegSlotSize() int64 { - return int64(len(m.clobberedRegs) * 16) -} - -func (m *machine) arg0OffsetFromSP() int64 { - return m.frameSize() + - 16 + // 16-byte aligned return address - 16 // frame size saved below the clobbered registers. -} - -func (m *machine) ret0OffsetFromSP() int64 { - return m.arg0OffsetFromSP() + m.currentABI.ArgStackSize -} - -func (m *machine) requiredStackSize() int64 { - return m.maxRequiredStackSizeForCalls + - m.frameSize() + - 16 + // 16-byte aligned return address. - 16 // frame size saved below the clobbered registers. -} - -func (m *machine) frameSize() int64 { - s := m.clobberedRegSlotSize() + m.spillSlotSize - if s&0xf != 0 { - panic(fmt.Errorf("BUG: frame size %d is not 16-byte aligned", s)) - } - return s -} - -func (m *machine) addJmpTableTarget(targets ssa.Values) (index int) { - if m.jmpTableTargetsNext == len(m.jmpTableTargets) { - m.jmpTableTargets = append(m.jmpTableTargets, make([]uint32, 0, len(targets.View()))) - } - - index = m.jmpTableTargetsNext - m.jmpTableTargetsNext++ - m.jmpTableTargets[index] = m.jmpTableTargets[index][:0] - for _, targetBlockID := range targets.View() { - target := m.compiler.SSABuilder().BasicBlock(ssa.BasicBlockID(targetBlockID)) - m.jmpTableTargets[index] = append(m.jmpTableTargets[index], uint32(target.ID())) - } - return -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go deleted file mode 100644 index c646a8fab..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go +++ /dev/null @@ -1,467 +0,0 @@ -package arm64 - -import ( - "fmt" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -// PostRegAlloc implements backend.Machine. -func (m *machine) PostRegAlloc() { - m.setupPrologue() - m.postRegAlloc() -} - -// setupPrologue initializes the prologue of the function. -func (m *machine) setupPrologue() { - cur := m.rootInstr - prevInitInst := cur.next - - // - // (high address) (high address) - // SP----> +-----------------+ +------------------+ <----+ - // | ....... | | ....... | | - // | ret Y | | ret Y | | - // | ....... | | ....... | | - // | ret 0 | | ret 0 | | - // | arg X | | arg X | | size_of_arg_ret. - // | ....... | ====> | ....... | | - // | arg 1 | | arg 1 | | - // | arg 0 | | arg 0 | <----+ - // |-----------------| | size_of_arg_ret | - // | return address | - // +------------------+ <---- SP - // (low address) (low address) - - // Saves the return address (lr) and the size_of_arg_ret below the SP. - // size_of_arg_ret is used for stack unwinding. - cur = m.createReturnAddrAndSizeOfArgRetSlot(cur) - - if !m.stackBoundsCheckDisabled { - cur = m.insertStackBoundsCheck(m.requiredStackSize(), cur) - } - - // Decrement SP if spillSlotSize > 0. - if m.spillSlotSize == 0 && len(m.spillSlots) != 0 { - panic(fmt.Sprintf("BUG: spillSlotSize=%d, spillSlots=%v\n", m.spillSlotSize, m.spillSlots)) - } - - if regs := m.clobberedRegs; len(regs) > 0 { - // - // (high address) (high address) - // +-----------------+ +-----------------+ - // | ....... | | ....... | - // | ret Y | | ret Y | - // | ....... | | ....... | - // | ret 0 | | ret 0 | - // | arg X | | arg X | - // | ....... | | ....... | - // | arg 1 | | arg 1 | - // | arg 0 | | arg 0 | - // | size_of_arg_ret | | size_of_arg_ret | - // | ReturnAddress | | ReturnAddress | - // SP----> +-----------------+ ====> +-----------------+ - // (low address) | clobbered M | - // | ............ | - // | clobbered 0 | - // +-----------------+ <----- SP - // (low address) - // - _amode := addressModePreOrPostIndex(m, spVReg, - -16, // stack pointer must be 16-byte aligned. - true, // Decrement before store. - ) - for _, vr := range regs { - // TODO: pair stores to reduce the number of instructions. - store := m.allocateInstr() - store.asStore(operandNR(vr), _amode, regTypeToRegisterSizeInBits(vr.RegType())) - cur = linkInstr(cur, store) - } - } - - if size := m.spillSlotSize; size > 0 { - // Check if size is 16-byte aligned. - if size&0xf != 0 { - panic(fmt.Errorf("BUG: spill slot size %d is not 16-byte aligned", size)) - } - - cur = m.addsAddOrSubStackPointer(cur, spVReg, size, false) - - // At this point, the stack looks like: - // - // (high address) - // +------------------+ - // | ....... | - // | ret Y | - // | ....... | - // | ret 0 | - // | arg X | - // | ....... | - // | arg 1 | - // | arg 0 | - // | size_of_arg_ret | - // | ReturnAddress | - // +------------------+ - // | clobbered M | - // | ............ | - // | clobbered 0 | - // | spill slot N | - // | ............ | - // | spill slot 2 | - // | spill slot 0 | - // SP----> +------------------+ - // (low address) - } - - // We push the frame size into the stack to make it possible to unwind stack: - // - // - // (high address) (high address) - // +-----------------+ +-----------------+ - // | ....... | | ....... | - // | ret Y | | ret Y | - // | ....... | | ....... | - // | ret 0 | | ret 0 | - // | arg X | | arg X | - // | ....... | | ....... | - // | arg 1 | | arg 1 | - // | arg 0 | | arg 0 | - // | size_of_arg_ret | | size_of_arg_ret | - // | ReturnAddress | | ReturnAddress | - // +-----------------+ ==> +-----------------+ <----+ - // | clobbered M | | clobbered M | | - // | ............ | | ............ | | - // | clobbered 2 | | clobbered 2 | | - // | clobbered 1 | | clobbered 1 | | frame size - // | clobbered 0 | | clobbered 0 | | - // | spill slot N | | spill slot N | | - // | ............ | | ............ | | - // | spill slot 0 | | spill slot 0 | <----+ - // SP---> +-----------------+ | xxxxxx | ;; unused space to make it 16-byte aligned. - // | frame_size | - // +-----------------+ <---- SP - // (low address) - // - cur = m.createFrameSizeSlot(cur, m.frameSize()) - - linkInstr(cur, prevInitInst) -} - -func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruction { - // First we decrement the stack pointer to point the arg0 slot. - var sizeOfArgRetReg regalloc.VReg - s := int64(m.currentABI.AlignedArgResultStackSlotSize()) - if s > 0 { - cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, s) - sizeOfArgRetReg = tmpRegVReg - - subSp := m.allocateInstr() - subSp.asALU(aluOpSub, spVReg, operandNR(spVReg), operandNR(sizeOfArgRetReg), true) - cur = linkInstr(cur, subSp) - } else { - sizeOfArgRetReg = xzrVReg - } - - // Saves the return address (lr) and the size_of_arg_ret below the SP. - // size_of_arg_ret is used for stack unwinding. - pstr := m.allocateInstr() - amode := addressModePreOrPostIndex(m, spVReg, -16, true /* decrement before store */) - pstr.asStorePair64(lrVReg, sizeOfArgRetReg, amode) - cur = linkInstr(cur, pstr) - return cur -} - -func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction { - var frameSizeReg regalloc.VReg - if s > 0 { - cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, s) - frameSizeReg = tmpRegVReg - } else { - frameSizeReg = xzrVReg - } - _amode := addressModePreOrPostIndex(m, spVReg, - -16, // stack pointer must be 16-byte aligned. - true, // Decrement before store. - ) - store := m.allocateInstr() - store.asStore(operandNR(frameSizeReg), _amode, 64) - cur = linkInstr(cur, store) - return cur -} - -// postRegAlloc does multiple things while walking through the instructions: -// 1. Removes the redundant copy instruction. -// 2. Inserts the epilogue. -func (m *machine) postRegAlloc() { - for cur := m.rootInstr; cur != nil; cur = cur.next { - switch cur.kind { - case ret: - m.setupEpilogueAfter(cur.prev) - case loadConstBlockArg: - lc := cur - next := lc.next - m.pendingInstructions = m.pendingInstructions[:0] - m.lowerLoadConstantBlockArgAfterRegAlloc(lc) - for _, instr := range m.pendingInstructions { - cur = linkInstr(cur, instr) - } - linkInstr(cur, next) - m.pendingInstructions = m.pendingInstructions[:0] - default: - // Removes the redundant copy instruction. - if cur.IsCopy() && cur.rn.realReg() == cur.rd.RealReg() { - prev, next := cur.prev, cur.next - // Remove the copy instruction. - prev.next = next - if next != nil { - next.prev = prev - } - } - } - } -} - -func (m *machine) setupEpilogueAfter(cur *instruction) { - prevNext := cur.next - - // We've stored the frame size in the prologue, and now that we are about to return from this function, we won't need it anymore. - cur = m.addsAddOrSubStackPointer(cur, spVReg, 16, true) - - if s := m.spillSlotSize; s > 0 { - // Adjust SP to the original value: - // - // (high address) (high address) - // +-----------------+ +-----------------+ - // | ....... | | ....... | - // | ret Y | | ret Y | - // | ....... | | ....... | - // | ret 0 | | ret 0 | - // | arg X | | arg X | - // | ....... | | ....... | - // | arg 1 | | arg 1 | - // | arg 0 | | arg 0 | - // | xxxxx | | xxxxx | - // | ReturnAddress | | ReturnAddress | - // +-----------------+ ====> +-----------------+ - // | clobbered M | | clobbered M | - // | ............ | | ............ | - // | clobbered 1 | | clobbered 1 | - // | clobbered 0 | | clobbered 0 | - // | spill slot N | +-----------------+ <---- SP - // | ............ | - // | spill slot 0 | - // SP---> +-----------------+ - // (low address) - // - cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true) - } - - // First we need to restore the clobbered registers. - if len(m.clobberedRegs) > 0 { - // (high address) - // +-----------------+ +-----------------+ - // | ....... | | ....... | - // | ret Y | | ret Y | - // | ....... | | ....... | - // | ret 0 | | ret 0 | - // | arg X | | arg X | - // | ....... | | ....... | - // | arg 1 | | arg 1 | - // | arg 0 | | arg 0 | - // | xxxxx | | xxxxx | - // | ReturnAddress | | ReturnAddress | - // +-----------------+ ========> +-----------------+ <---- SP - // | clobbered M | - // | ........... | - // | clobbered 1 | - // | clobbered 0 | - // SP---> +-----------------+ - // (low address) - - l := len(m.clobberedRegs) - 1 - for i := range m.clobberedRegs { - vr := m.clobberedRegs[l-i] // reverse order to restore. - load := m.allocateInstr() - amode := addressModePreOrPostIndex(m, spVReg, - 16, // stack pointer must be 16-byte aligned. - false, // Increment after store. - ) - // TODO: pair loads to reduce the number of instructions. - switch regTypeToRegisterSizeInBits(vr.RegType()) { - case 64: // save int reg. - load.asULoad(vr, amode, 64) - case 128: // save vector reg. - load.asFpuLoad(vr, amode, 128) - } - cur = linkInstr(cur, load) - } - } - - // Reload the return address (lr). - // - // +-----------------+ +-----------------+ - // | ....... | | ....... | - // | ret Y | | ret Y | - // | ....... | | ....... | - // | ret 0 | | ret 0 | - // | arg X | | arg X | - // | ....... | ===> | ....... | - // | arg 1 | | arg 1 | - // | arg 0 | | arg 0 | - // | xxxxx | +-----------------+ <---- SP - // | ReturnAddress | - // SP----> +-----------------+ - - ldr := m.allocateInstr() - ldr.asULoad(lrVReg, - addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) - cur = linkInstr(cur, ldr) - - if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 { - cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true) - } - - linkInstr(cur, prevNext) -} - -// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient -// stack space left. Basically this is the combination of CalleeSavedRegisters plus argument registers execpt for x0, -// which always points to the execution context whenever the native code is entered from Go. -var saveRequiredRegs = []regalloc.VReg{ - x1VReg, x2VReg, x3VReg, x4VReg, x5VReg, x6VReg, x7VReg, - x19VReg, x20VReg, x21VReg, x22VReg, x23VReg, x24VReg, x25VReg, x26VReg, x28VReg, lrVReg, - v0VReg, v1VReg, v2VReg, v3VReg, v4VReg, v5VReg, v6VReg, v7VReg, - v18VReg, v19VReg, v20VReg, v21VReg, v22VReg, v23VReg, v24VReg, v25VReg, v26VReg, v27VReg, v28VReg, v29VReg, v30VReg, v31VReg, -} - -// insertStackBoundsCheck will insert the instructions after `cur` to check the -// stack bounds, and if there's no sufficient spaces required for the function, -// exit the execution and try growing it in Go world. -// -// TODO: we should be able to share the instructions across all the functions to reduce the size of compiled executable. -func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instruction) *instruction { - if requiredStackSize%16 != 0 { - panic("BUG") - } - - if immm12op, ok := asImm12Operand(uint64(requiredStackSize)); ok { - // sub tmp, sp, #requiredStackSize - sub := m.allocateInstr() - sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), immm12op, true) - cur = linkInstr(cur, sub) - } else { - // This case, we first load the requiredStackSize into the temporary register, - cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize) - // Then subtract it. - sub := m.allocateInstr() - sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), operandNR(tmpRegVReg), true) - cur = linkInstr(cur, sub) - } - - tmp2 := x11VReg // Caller save, so it is safe to use it here in the prologue. - - // ldr tmp2, [executionContext #StackBottomPtr] - ldr := m.allocateInstr() - amode := m.amodePool.Allocate() - *amode = addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: x0VReg, // execution context is always the first argument. - imm: wazevoapi.ExecutionContextOffsetStackBottomPtr.I64(), - } - ldr.asULoad(tmp2, amode, 64) - cur = linkInstr(cur, ldr) - - // subs xzr, tmp, tmp2 - subs := m.allocateInstr() - subs.asALU(aluOpSubS, xzrVReg, operandNR(tmpRegVReg), operandNR(tmp2), true) - cur = linkInstr(cur, subs) - - // b.ge #imm - cbr := m.allocateInstr() - cbr.asCondBr(ge.asCond(), labelInvalid, false /* ignored */) - cur = linkInstr(cur, cbr) - - // Set the required stack size and set it to the exec context. - { - // First load the requiredStackSize into the temporary register, - cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize) - setRequiredStackSize := m.allocateInstr() - amode := m.amodePool.Allocate() - *amode = addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(), - } - setRequiredStackSize.asStore(operandNR(tmpRegVReg), amode, 64) - - cur = linkInstr(cur, setRequiredStackSize) - } - - ldrAddress := m.allocateInstr() - amode2 := m.amodePool.Allocate() - *amode2 = addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: x0VReg, // execution context is always the first argument - imm: wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.I64(), - } - ldrAddress.asULoad(tmpRegVReg, amode2, 64) - cur = linkInstr(cur, ldrAddress) - - // Then jumps to the stack grow call sequence's address, meaning - // transferring the control to the code compiled by CompileStackGrowCallSequence. - bl := m.allocateInstr() - bl.asCallIndirect(tmpRegVReg, nil) - cur = linkInstr(cur, bl) - - // Now that we know the entire code, we can finalize how many bytes - // we have to skip when the stack size is sufficient. - var cbrOffset int64 - for _cur := cbr; ; _cur = _cur.next { - cbrOffset += _cur.size() - if _cur == cur { - break - } - } - cbr.condBrOffsetResolve(cbrOffset) - return cur -} - -// CompileStackGrowCallSequence implements backend.Machine. -func (m *machine) CompileStackGrowCallSequence() []byte { - cur := m.allocateInstr() - cur.asNop0() - m.rootInstr = cur - - // Save the callee saved and argument registers. - cur = m.saveRegistersInExecutionContext(cur, saveRequiredRegs) - - // Save the current stack pointer. - cur = m.saveCurrentStackPointer(cur, x0VReg) - - // Set the exit status on the execution context. - cur = m.setExitCode(cur, x0VReg, wazevoapi.ExitCodeGrowStack) - - // Exit the execution. - cur = m.storeReturnAddressAndExit(cur) - - // After the exit, restore the saved registers. - cur = m.restoreRegistersInExecutionContext(cur, saveRequiredRegs) - - // Then goes back the original address of this stack grow call. - ret := m.allocateInstr() - ret.asRet() - linkInstr(cur, ret) - - m.encode(m.rootInstr) - return m.compiler.Buf() -} - -func (m *machine) addsAddOrSubStackPointer(cur *instruction, rd regalloc.VReg, diff int64, add bool) *instruction { - m.pendingInstructions = m.pendingInstructions[:0] - m.insertAddOrSubStackPointer(rd, diff, add) - for _, inserted := range m.pendingInstructions { - cur = linkInstr(cur, inserted) - } - return cur -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go deleted file mode 100644 index f2ed53ae5..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go +++ /dev/null @@ -1,351 +0,0 @@ -package arm64 - -// This file implements the interfaces required for register allocations. See backend.RegAllocFunctionMachine. - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -// regAllocFn implements regalloc.Function. -type regAllocFn struct { - ssaB ssa.Builder - m *machine - loopNestingForestRoots []ssa.BasicBlock - blockIter int -} - -// PostOrderBlockIteratorBegin implements regalloc.Function. -func (f *regAllocFn) PostOrderBlockIteratorBegin() *labelPosition { - f.blockIter = len(f.m.orderedSSABlockLabelPos) - 1 - return f.PostOrderBlockIteratorNext() -} - -// PostOrderBlockIteratorNext implements regalloc.Function. -func (f *regAllocFn) PostOrderBlockIteratorNext() *labelPosition { - if f.blockIter < 0 { - return nil - } - b := f.m.orderedSSABlockLabelPos[f.blockIter] - f.blockIter-- - return b -} - -// ReversePostOrderBlockIteratorBegin implements regalloc.Function. -func (f *regAllocFn) ReversePostOrderBlockIteratorBegin() *labelPosition { - f.blockIter = 0 - return f.ReversePostOrderBlockIteratorNext() -} - -// ReversePostOrderBlockIteratorNext implements regalloc.Function. -func (f *regAllocFn) ReversePostOrderBlockIteratorNext() *labelPosition { - if f.blockIter >= len(f.m.orderedSSABlockLabelPos) { - return nil - } - b := f.m.orderedSSABlockLabelPos[f.blockIter] - f.blockIter++ - return b -} - -// ClobberedRegisters implements regalloc.Function. -func (f *regAllocFn) ClobberedRegisters(regs []regalloc.VReg) { - f.m.clobberedRegs = append(f.m.clobberedRegs[:0], regs...) -} - -// LoopNestingForestRoots implements regalloc.Function. -func (f *regAllocFn) LoopNestingForestRoots() int { - f.loopNestingForestRoots = f.ssaB.LoopNestingForestRoots() - return len(f.loopNestingForestRoots) -} - -// LoopNestingForestRoot implements regalloc.Function. -func (f *regAllocFn) LoopNestingForestRoot(i int) *labelPosition { - root := f.loopNestingForestRoots[i] - pos := f.m.getOrAllocateSSABlockLabelPosition(root) - return pos -} - -// LowestCommonAncestor implements regalloc.Function. -func (f *regAllocFn) LowestCommonAncestor(blk1, blk2 *labelPosition) *labelPosition { - sb := f.ssaB.LowestCommonAncestor(blk1.sb, blk2.sb) - pos := f.m.getOrAllocateSSABlockLabelPosition(sb) - return pos -} - -// Idom implements regalloc.Function. -func (f *regAllocFn) Idom(blk *labelPosition) *labelPosition { - sb := f.ssaB.Idom(blk.sb) - pos := f.m.getOrAllocateSSABlockLabelPosition(sb) - return pos -} - -// SwapBefore implements regalloc.Function. -func (f *regAllocFn) SwapBefore(x1, x2, tmp regalloc.VReg, instr *instruction) { - f.m.swap(instr.prev, x1, x2, tmp) -} - -// StoreRegisterBefore implements regalloc.Function. -func (f *regAllocFn) StoreRegisterBefore(v regalloc.VReg, instr *instruction) { - m := f.m - m.insertStoreRegisterAt(v, instr, false) -} - -// StoreRegisterAfter implements regalloc.Function. -func (f *regAllocFn) StoreRegisterAfter(v regalloc.VReg, instr *instruction) { - m := f.m - m.insertStoreRegisterAt(v, instr, true) -} - -// ReloadRegisterBefore implements regalloc.Function. -func (f *regAllocFn) ReloadRegisterBefore(v regalloc.VReg, instr *instruction) { - m := f.m - m.insertReloadRegisterAt(v, instr, false) -} - -// ReloadRegisterAfter implements regalloc.Function. -func (f *regAllocFn) ReloadRegisterAfter(v regalloc.VReg, instr *instruction) { - m := f.m - m.insertReloadRegisterAt(v, instr, true) -} - -// InsertMoveBefore implements regalloc.Function. -func (f *regAllocFn) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { - f.m.insertMoveBefore(dst, src, instr) -} - -// LoopNestingForestChild implements regalloc.Function. -func (f *regAllocFn) LoopNestingForestChild(pos *labelPosition, i int) *labelPosition { - childSB := pos.sb.LoopNestingForestChildren()[i] - return f.m.getOrAllocateSSABlockLabelPosition(childSB) -} - -// Succ implements regalloc.Block. -func (f *regAllocFn) Succ(pos *labelPosition, i int) *labelPosition { - succSB := pos.sb.Succ(i) - if succSB.ReturnBlock() { - return nil - } - return f.m.getOrAllocateSSABlockLabelPosition(succSB) -} - -// Pred implements regalloc.Block. -func (f *regAllocFn) Pred(pos *labelPosition, i int) *labelPosition { - predSB := pos.sb.Pred(i) - return f.m.getOrAllocateSSABlockLabelPosition(predSB) -} - -// BlockParams implements regalloc.Function. -func (f *regAllocFn) BlockParams(pos *labelPosition, regs *[]regalloc.VReg) []regalloc.VReg { - c := f.m.compiler - *regs = (*regs)[:0] - for i := 0; i < pos.sb.Params(); i++ { - v := c.VRegOf(pos.sb.Param(i)) - *regs = append(*regs, v) - } - return *regs -} - -// ID implements regalloc.Block. -func (pos *labelPosition) ID() int32 { - return int32(pos.sb.ID()) -} - -// InstrIteratorBegin implements regalloc.Block. -func (pos *labelPosition) InstrIteratorBegin() *instruction { - ret := pos.begin - pos.cur = ret - return ret -} - -// InstrIteratorNext implements regalloc.Block. -func (pos *labelPosition) InstrIteratorNext() *instruction { - for { - if pos.cur == pos.end { - return nil - } - instr := pos.cur.next - pos.cur = instr - if instr == nil { - return nil - } else if instr.addedBeforeRegAlloc { - // Only concerned about the instruction added before regalloc. - return instr - } - } -} - -// InstrRevIteratorBegin implements regalloc.Block. -func (pos *labelPosition) InstrRevIteratorBegin() *instruction { - pos.cur = pos.end - return pos.cur -} - -// InstrRevIteratorNext implements regalloc.Block. -func (pos *labelPosition) InstrRevIteratorNext() *instruction { - for { - if pos.cur == pos.begin { - return nil - } - instr := pos.cur.prev - pos.cur = instr - if instr == nil { - return nil - } else if instr.addedBeforeRegAlloc { - // Only concerned about the instruction added before regalloc. - return instr - } - } -} - -// FirstInstr implements regalloc.Block. -func (pos *labelPosition) FirstInstr() *instruction { return pos.begin } - -// LastInstrForInsertion implements regalloc.Block. -func (pos *labelPosition) LastInstrForInsertion() *instruction { - return lastInstrForInsertion(pos.begin, pos.end) -} - -// Preds implements regalloc.Block. -func (pos *labelPosition) Preds() int { return pos.sb.Preds() } - -// Entry implements regalloc.Block. -func (pos *labelPosition) Entry() bool { return pos.sb.EntryBlock() } - -// Succs implements regalloc.Block. -func (pos *labelPosition) Succs() int { return pos.sb.Succs() } - -// LoopHeader implements regalloc.Block. -func (pos *labelPosition) LoopHeader() bool { return pos.sb.LoopHeader() } - -// LoopNestingForestChildren implements regalloc.Block. -func (pos *labelPosition) LoopNestingForestChildren() int { - return len(pos.sb.LoopNestingForestChildren()) -} - -func (m *machine) swap(cur *instruction, x1, x2, tmp regalloc.VReg) { - prevNext := cur.next - var mov1, mov2, mov3 *instruction - if x1.RegType() == regalloc.RegTypeInt { - if !tmp.Valid() { - tmp = tmpRegVReg - } - mov1 = m.allocateInstr().asMove64(tmp, x1) - mov2 = m.allocateInstr().asMove64(x1, x2) - mov3 = m.allocateInstr().asMove64(x2, tmp) - cur = linkInstr(cur, mov1) - cur = linkInstr(cur, mov2) - cur = linkInstr(cur, mov3) - linkInstr(cur, prevNext) - } else { - if !tmp.Valid() { - r2 := x2.RealReg() - // Temporarily spill x1 to stack. - cur = m.insertStoreRegisterAt(x1, cur, true).prev - // Then move x2 to x1. - cur = linkInstr(cur, m.allocateInstr().asFpuMov128(x1, x2)) - linkInstr(cur, prevNext) - // Then reload the original value on x1 from stack to r2. - m.insertReloadRegisterAt(x1.SetRealReg(r2), cur, true) - } else { - mov1 = m.allocateInstr().asFpuMov128(tmp, x1) - mov2 = m.allocateInstr().asFpuMov128(x1, x2) - mov3 = m.allocateInstr().asFpuMov128(x2, tmp) - cur = linkInstr(cur, mov1) - cur = linkInstr(cur, mov2) - cur = linkInstr(cur, mov3) - linkInstr(cur, prevNext) - } - } -} - -func (m *machine) insertMoveBefore(dst, src regalloc.VReg, instr *instruction) { - typ := src.RegType() - if typ != dst.RegType() { - panic("BUG: src and dst must have the same type") - } - - mov := m.allocateInstr() - if typ == regalloc.RegTypeInt { - mov.asMove64(dst, src) - } else { - mov.asFpuMov128(dst, src) - } - - cur := instr.prev - prevNext := cur.next - cur = linkInstr(cur, mov) - linkInstr(cur, prevNext) -} - -func (m *machine) insertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { - if !v.IsRealReg() { - panic("BUG: VReg must be backed by real reg to be stored") - } - - typ := m.compiler.TypeOf(v) - - var prevNext, cur *instruction - if after { - cur, prevNext = instr, instr.next - } else { - cur, prevNext = instr.prev, instr - } - - offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size()) - var amode *addressMode - cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true) - store := m.allocateInstr() - store.asStore(operandNR(v), amode, typ.Bits()) - - cur = linkInstr(cur, store) - return linkInstr(cur, prevNext) -} - -func (m *machine) insertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { - if !v.IsRealReg() { - panic("BUG: VReg must be backed by real reg to be stored") - } - - typ := m.compiler.TypeOf(v) - - var prevNext, cur *instruction - if after { - cur, prevNext = instr, instr.next - } else { - cur, prevNext = instr.prev, instr - } - - offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size()) - var amode *addressMode - cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true) - load := m.allocateInstr() - switch typ { - case ssa.TypeI32, ssa.TypeI64: - load.asULoad(v, amode, typ.Bits()) - case ssa.TypeF32, ssa.TypeF64: - load.asFpuLoad(v, amode, typ.Bits()) - case ssa.TypeV128: - load.asFpuLoad(v, amode, 128) - default: - panic("TODO") - } - - cur = linkInstr(cur, load) - return linkInstr(cur, prevNext) -} - -func lastInstrForInsertion(begin, end *instruction) *instruction { - cur := end - for cur.kind == nop0 { - cur = cur.prev - if cur == begin { - return end - } - } - switch cur.kind { - case br: - return cur - default: - return end - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go deleted file mode 100644 index 932fe842b..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go +++ /dev/null @@ -1,122 +0,0 @@ -package arm64 - -import ( - "encoding/binary" - "fmt" - "math" - "sort" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" -) - -const ( - // trampolineCallSize is the size of the trampoline instruction sequence for each function in an island. - trampolineCallSize = 4*4 + 4 // Four instructions + 32-bit immediate. - - // Unconditional branch offset is encoded as divided by 4 in imm26. - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en - - maxUnconditionalBranchOffset = maxSignedInt26 * 4 - minUnconditionalBranchOffset = minSignedInt26 * 4 - - // trampolineIslandInterval is the range of the trampoline island. - // Half of the range is used for the trampoline island, and the other half is used for the function. - trampolineIslandInterval = (maxUnconditionalBranchOffset - 1) / 2 - - // maxNumFunctions explicitly specifies the maximum number of functions that can be allowed in a single executable. - maxNumFunctions = trampolineIslandInterval >> 6 - - // maxFunctionExecutableSize is the maximum size of a function that can exist in a trampoline island. - // Conservatively set to 1/4 of the trampoline island interval. - maxFunctionExecutableSize = trampolineIslandInterval >> 2 -) - -// CallTrampolineIslandInfo implements backend.Machine CallTrampolineIslandInfo. -func (m *machine) CallTrampolineIslandInfo(numFunctions int) (interval, size int, err error) { - if numFunctions > maxNumFunctions { - return 0, 0, fmt.Errorf("too many functions: %d > %d", numFunctions, maxNumFunctions) - } - return trampolineIslandInterval, trampolineCallSize * numFunctions, nil -} - -// ResolveRelocations implements backend.Machine ResolveRelocations. -func (m *machine) ResolveRelocations( - refToBinaryOffset []int, - importedFns int, - executable []byte, - relocations []backend.RelocationInfo, - callTrampolineIslandOffsets []int, -) { - for _, islandOffset := range callTrampolineIslandOffsets { - encodeCallTrampolineIsland(refToBinaryOffset, importedFns, islandOffset, executable) - } - - for _, r := range relocations { - instrOffset := r.Offset - calleeFnOffset := refToBinaryOffset[r.FuncRef] - diff := int64(calleeFnOffset) - (instrOffset) - // Check if the diff is within the range of the branch instruction. - if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset { - // Find the near trampoline island from callTrampolineIslandOffsets. - islandOffset := searchTrampolineIsland(callTrampolineIslandOffsets, int(instrOffset)) - islandTargetOffset := islandOffset + trampolineCallSize*int(r.FuncRef) - diff = int64(islandTargetOffset) - (instrOffset) - if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset { - panic("BUG in trampoline placement") - } - } - binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(true, diff)) - } -} - -// encodeCallTrampolineIsland encodes a trampoline island for the given functions. -// Each island consists of a trampoline instruction sequence for each function. -// Each trampoline instruction sequence consists of 4 instructions + 32-bit immediate. -func encodeCallTrampolineIsland(refToBinaryOffset []int, importedFns int, islandOffset int, executable []byte) { - // We skip the imported functions: they don't need trampolines - // and are not accounted for. - binaryOffsets := refToBinaryOffset[importedFns:] - - for i := 0; i < len(binaryOffsets); i++ { - trampolineOffset := islandOffset + trampolineCallSize*i - - fnOffset := binaryOffsets[i] - diff := fnOffset - (trampolineOffset + 16) - if diff > math.MaxInt32 || diff < math.MinInt32 { - // This case even amd64 can't handle. 4GB is too big. - panic("too big binary") - } - - // The tmpReg, tmpReg2 is safe to overwrite (in fact any caller-saved register is safe to use). - tmpReg, tmpReg2 := regNumberInEncoding[tmpRegVReg.RealReg()], regNumberInEncoding[x11] - - // adr tmpReg, PC+16: load the address of #diff into tmpReg. - binary.LittleEndian.PutUint32(executable[trampolineOffset:], encodeAdr(tmpReg, 16)) - // ldrsw tmpReg2, [tmpReg]: Load #diff into tmpReg2. - binary.LittleEndian.PutUint32(executable[trampolineOffset+4:], - encodeLoadOrStore(sLoad32, tmpReg2, addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpRegVReg})) - // add tmpReg, tmpReg2, tmpReg: add #diff to the address of #diff, getting the absolute address of the function. - binary.LittleEndian.PutUint32(executable[trampolineOffset+8:], - encodeAluRRR(aluOpAdd, tmpReg, tmpReg, tmpReg2, true, false)) - // br tmpReg: branch to the function without overwriting the link register. - binary.LittleEndian.PutUint32(executable[trampolineOffset+12:], encodeUnconditionalBranchReg(tmpReg, false)) - // #diff - binary.LittleEndian.PutUint32(executable[trampolineOffset+16:], uint32(diff)) - } -} - -// searchTrampolineIsland finds the nearest trampoline island from callTrampolineIslandOffsets. -// Note that even if the offset is in the middle of two islands, it returns the latter one. -// That is ok because the island is always placed in the middle of the range. -// -// precondition: callTrampolineIslandOffsets is sorted in ascending order. -func searchTrampolineIsland(callTrampolineIslandOffsets []int, offset int) int { - l := len(callTrampolineIslandOffsets) - n := sort.Search(l, func(i int) bool { - return callTrampolineIslandOffsets[i] >= offset - }) - if n == l { - n = l - 1 - } - return callTrampolineIslandOffsets[n] -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/reg.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/reg.go deleted file mode 100644 index 45737516d..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/reg.go +++ /dev/null @@ -1,397 +0,0 @@ -package arm64 - -import ( - "fmt" - "strconv" - "strings" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" -) - -// Arm64-specific registers. -// -// See https://developer.arm.com/documentation/dui0801/a/Overview-of-AArch64-state/Predeclared-core-register-names-in-AArch64-state - -const ( - // General purpose registers. Note that we do not distinguish wn and xn registers - // because they are the same from the perspective of register allocator, and - // the size can be determined by the type of the instruction. - - x0 = regalloc.RealRegInvalid + 1 + iota - x1 - x2 - x3 - x4 - x5 - x6 - x7 - x8 - x9 - x10 - x11 - x12 - x13 - x14 - x15 - x16 - x17 - x18 - x19 - x20 - x21 - x22 - x23 - x24 - x25 - x26 - x27 - x28 - x29 - x30 - - // Vector registers. Note that we do not distinguish vn and dn, ... registers - // because they are the same from the perspective of register allocator, and - // the size can be determined by the type of the instruction. - - v0 - v1 - v2 - v3 - v4 - v5 - v6 - v7 - v8 - v9 - v10 - v11 - v12 - v13 - v14 - v15 - v16 - v17 - v18 - v19 - v20 - v21 - v22 - v23 - v24 - v25 - v26 - v27 - v28 - v29 - v30 - v31 - - // Special registers - - xzr - sp - lr = x30 - fp = x29 - tmp = x27 -) - -var ( - x0VReg = regalloc.FromRealReg(x0, regalloc.RegTypeInt) - x1VReg = regalloc.FromRealReg(x1, regalloc.RegTypeInt) - x2VReg = regalloc.FromRealReg(x2, regalloc.RegTypeInt) - x3VReg = regalloc.FromRealReg(x3, regalloc.RegTypeInt) - x4VReg = regalloc.FromRealReg(x4, regalloc.RegTypeInt) - x5VReg = regalloc.FromRealReg(x5, regalloc.RegTypeInt) - x6VReg = regalloc.FromRealReg(x6, regalloc.RegTypeInt) - x7VReg = regalloc.FromRealReg(x7, regalloc.RegTypeInt) - x8VReg = regalloc.FromRealReg(x8, regalloc.RegTypeInt) - x9VReg = regalloc.FromRealReg(x9, regalloc.RegTypeInt) - x10VReg = regalloc.FromRealReg(x10, regalloc.RegTypeInt) - x11VReg = regalloc.FromRealReg(x11, regalloc.RegTypeInt) - x12VReg = regalloc.FromRealReg(x12, regalloc.RegTypeInt) - x13VReg = regalloc.FromRealReg(x13, regalloc.RegTypeInt) - x14VReg = regalloc.FromRealReg(x14, regalloc.RegTypeInt) - x15VReg = regalloc.FromRealReg(x15, regalloc.RegTypeInt) - x16VReg = regalloc.FromRealReg(x16, regalloc.RegTypeInt) - x17VReg = regalloc.FromRealReg(x17, regalloc.RegTypeInt) - x18VReg = regalloc.FromRealReg(x18, regalloc.RegTypeInt) - x19VReg = regalloc.FromRealReg(x19, regalloc.RegTypeInt) - x20VReg = regalloc.FromRealReg(x20, regalloc.RegTypeInt) - x21VReg = regalloc.FromRealReg(x21, regalloc.RegTypeInt) - x22VReg = regalloc.FromRealReg(x22, regalloc.RegTypeInt) - x23VReg = regalloc.FromRealReg(x23, regalloc.RegTypeInt) - x24VReg = regalloc.FromRealReg(x24, regalloc.RegTypeInt) - x25VReg = regalloc.FromRealReg(x25, regalloc.RegTypeInt) - x26VReg = regalloc.FromRealReg(x26, regalloc.RegTypeInt) - x27VReg = regalloc.FromRealReg(x27, regalloc.RegTypeInt) - x28VReg = regalloc.FromRealReg(x28, regalloc.RegTypeInt) - x29VReg = regalloc.FromRealReg(x29, regalloc.RegTypeInt) - x30VReg = regalloc.FromRealReg(x30, regalloc.RegTypeInt) - v0VReg = regalloc.FromRealReg(v0, regalloc.RegTypeFloat) - v1VReg = regalloc.FromRealReg(v1, regalloc.RegTypeFloat) - v2VReg = regalloc.FromRealReg(v2, regalloc.RegTypeFloat) - v3VReg = regalloc.FromRealReg(v3, regalloc.RegTypeFloat) - v4VReg = regalloc.FromRealReg(v4, regalloc.RegTypeFloat) - v5VReg = regalloc.FromRealReg(v5, regalloc.RegTypeFloat) - v6VReg = regalloc.FromRealReg(v6, regalloc.RegTypeFloat) - v7VReg = regalloc.FromRealReg(v7, regalloc.RegTypeFloat) - v8VReg = regalloc.FromRealReg(v8, regalloc.RegTypeFloat) - v9VReg = regalloc.FromRealReg(v9, regalloc.RegTypeFloat) - v10VReg = regalloc.FromRealReg(v10, regalloc.RegTypeFloat) - v11VReg = regalloc.FromRealReg(v11, regalloc.RegTypeFloat) - v12VReg = regalloc.FromRealReg(v12, regalloc.RegTypeFloat) - v13VReg = regalloc.FromRealReg(v13, regalloc.RegTypeFloat) - v14VReg = regalloc.FromRealReg(v14, regalloc.RegTypeFloat) - v15VReg = regalloc.FromRealReg(v15, regalloc.RegTypeFloat) - v16VReg = regalloc.FromRealReg(v16, regalloc.RegTypeFloat) - v17VReg = regalloc.FromRealReg(v17, regalloc.RegTypeFloat) - v18VReg = regalloc.FromRealReg(v18, regalloc.RegTypeFloat) - v19VReg = regalloc.FromRealReg(v19, regalloc.RegTypeFloat) - v20VReg = regalloc.FromRealReg(v20, regalloc.RegTypeFloat) - v21VReg = regalloc.FromRealReg(v21, regalloc.RegTypeFloat) - v22VReg = regalloc.FromRealReg(v22, regalloc.RegTypeFloat) - v23VReg = regalloc.FromRealReg(v23, regalloc.RegTypeFloat) - v24VReg = regalloc.FromRealReg(v24, regalloc.RegTypeFloat) - v25VReg = regalloc.FromRealReg(v25, regalloc.RegTypeFloat) - v26VReg = regalloc.FromRealReg(v26, regalloc.RegTypeFloat) - v27VReg = regalloc.FromRealReg(v27, regalloc.RegTypeFloat) - // lr (link register) holds the return address at the function entry. - lrVReg = x30VReg - // tmpReg is used to perform spill/load on large stack offsets, and load large constants. - // Therefore, be cautious to use this register in the middle of the compilation, especially before the register allocation. - // This is the same as golang/go, but it's only described in the source code: - // https://github.com/golang/go/blob/18e17e2cb12837ea2c8582ecdb0cc780f49a1aac/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go#L59 - // https://github.com/golang/go/blob/18e17e2cb12837ea2c8582ecdb0cc780f49a1aac/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go#L13-L15 - tmpRegVReg = regalloc.FromRealReg(tmp, regalloc.RegTypeInt) - v28VReg = regalloc.FromRealReg(v28, regalloc.RegTypeFloat) - v29VReg = regalloc.FromRealReg(v29, regalloc.RegTypeFloat) - v30VReg = regalloc.FromRealReg(v30, regalloc.RegTypeFloat) - v31VReg = regalloc.FromRealReg(v31, regalloc.RegTypeFloat) - xzrVReg = regalloc.FromRealReg(xzr, regalloc.RegTypeInt) - spVReg = regalloc.FromRealReg(sp, regalloc.RegTypeInt) - fpVReg = regalloc.FromRealReg(fp, regalloc.RegTypeInt) -) - -var regNames = [...]string{ - x0: "x0", - x1: "x1", - x2: "x2", - x3: "x3", - x4: "x4", - x5: "x5", - x6: "x6", - x7: "x7", - x8: "x8", - x9: "x9", - x10: "x10", - x11: "x11", - x12: "x12", - x13: "x13", - x14: "x14", - x15: "x15", - x16: "x16", - x17: "x17", - x18: "x18", - x19: "x19", - x20: "x20", - x21: "x21", - x22: "x22", - x23: "x23", - x24: "x24", - x25: "x25", - x26: "x26", - x27: "x27", - x28: "x28", - x29: "x29", - x30: "x30", - xzr: "xzr", - sp: "sp", - v0: "v0", - v1: "v1", - v2: "v2", - v3: "v3", - v4: "v4", - v5: "v5", - v6: "v6", - v7: "v7", - v8: "v8", - v9: "v9", - v10: "v10", - v11: "v11", - v12: "v12", - v13: "v13", - v14: "v14", - v15: "v15", - v16: "v16", - v17: "v17", - v18: "v18", - v19: "v19", - v20: "v20", - v21: "v21", - v22: "v22", - v23: "v23", - v24: "v24", - v25: "v25", - v26: "v26", - v27: "v27", - v28: "v28", - v29: "v29", - v30: "v30", - v31: "v31", -} - -func formatVRegSized(r regalloc.VReg, size byte) (ret string) { - if r.IsRealReg() { - ret = regNames[r.RealReg()] - switch ret[0] { - case 'x': - switch size { - case 32: - ret = strings.Replace(ret, "x", "w", 1) - case 64: - default: - panic("BUG: invalid register size: " + strconv.Itoa(int(size))) - } - case 'v': - switch size { - case 32: - ret = strings.Replace(ret, "v", "s", 1) - case 64: - ret = strings.Replace(ret, "v", "d", 1) - case 128: - ret = strings.Replace(ret, "v", "q", 1) - default: - panic("BUG: invalid register size") - } - } - } else { - switch r.RegType() { - case regalloc.RegTypeInt: - switch size { - case 32: - ret = fmt.Sprintf("w%d?", r.ID()) - case 64: - ret = fmt.Sprintf("x%d?", r.ID()) - default: - panic("BUG: invalid register size: " + strconv.Itoa(int(size))) - } - case regalloc.RegTypeFloat: - switch size { - case 32: - ret = fmt.Sprintf("s%d?", r.ID()) - case 64: - ret = fmt.Sprintf("d%d?", r.ID()) - case 128: - ret = fmt.Sprintf("q%d?", r.ID()) - default: - panic("BUG: invalid register size") - } - default: - panic(fmt.Sprintf("BUG: invalid register type: %d for %s", r.RegType(), r)) - } - } - return -} - -func formatVRegWidthVec(r regalloc.VReg, width vecArrangement) (ret string) { - var id string - wspec := strings.ToLower(width.String()) - if r.IsRealReg() { - id = regNames[r.RealReg()][1:] - } else { - id = fmt.Sprintf("%d?", r.ID()) - } - ret = fmt.Sprintf("%s%s", wspec, id) - return -} - -func formatVRegVec(r regalloc.VReg, arr vecArrangement, index vecIndex) (ret string) { - id := fmt.Sprintf("v%d?", r.ID()) - if r.IsRealReg() { - id = regNames[r.RealReg()] - } - ret = fmt.Sprintf("%s.%s", id, strings.ToLower(arr.String())) - if index != vecIndexNone { - ret += fmt.Sprintf("[%d]", index) - } - return -} - -func regTypeToRegisterSizeInBits(r regalloc.RegType) byte { - switch r { - case regalloc.RegTypeInt: - return 64 - case regalloc.RegTypeFloat: - return 128 - default: - panic("BUG: invalid register type") - } -} - -var regNumberInEncoding = [...]uint32{ - x0: 0, - x1: 1, - x2: 2, - x3: 3, - x4: 4, - x5: 5, - x6: 6, - x7: 7, - x8: 8, - x9: 9, - x10: 10, - x11: 11, - x12: 12, - x13: 13, - x14: 14, - x15: 15, - x16: 16, - x17: 17, - x18: 18, - x19: 19, - x20: 20, - x21: 21, - x22: 22, - x23: 23, - x24: 24, - x25: 25, - x26: 26, - x27: 27, - x28: 28, - x29: 29, - x30: 30, - xzr: 31, - sp: 31, - v0: 0, - v1: 1, - v2: 2, - v3: 3, - v4: 4, - v5: 5, - v6: 6, - v7: 7, - v8: 8, - v9: 9, - v10: 10, - v11: 11, - v12: 12, - v13: 13, - v14: 14, - v15: 15, - v16: 16, - v17: 17, - v18: 18, - v19: 19, - v20: 20, - v21: 21, - v22: 22, - v23: 23, - v24: 24, - v25: 25, - v26: 26, - v27: 27, - v28: 28, - v29: 29, - v30: 30, - v31: 31, -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go deleted file mode 100644 index a72b86f6b..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go +++ /dev/null @@ -1,84 +0,0 @@ -package arm64 - -import ( - "encoding/binary" - "reflect" - "unsafe" - - "github.com/tetratelabs/wazero/internal/wasmdebug" -) - -// UnwindStack implements wazevo.unwindStack. -func UnwindStack(sp, _, top uintptr, returnAddresses []uintptr) []uintptr { - l := int(top - sp) - - var stackBuf []byte - { - //nolint:staticcheck - hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf)) - hdr.Data = sp - hdr.Len = l - hdr.Cap = l - } - - for i := uint64(0); i < uint64(l); { - // (high address) - // +-----------------+ - // | ....... | - // | ret Y | <----+ - // | ....... | | - // | ret 0 | | - // | arg X | | size_of_arg_ret - // | ....... | | - // | arg 1 | | - // | arg 0 | <----+ - // | size_of_arg_ret | - // | ReturnAddress | - // +-----------------+ <----+ - // | ........... | | - // | spill slot M | | - // | ............ | | - // | spill slot 2 | | - // | spill slot 1 | | frame size - // | spill slot 1 | | - // | clobbered N | | - // | ............ | | - // | clobbered 0 | <----+ - // | xxxxxx | ;; unused space to make it 16-byte aligned. - // | frame_size | - // +-----------------+ <---- SP - // (low address) - - frameSize := binary.LittleEndian.Uint64(stackBuf[i:]) - i += frameSize + - 16 // frame size + aligned space. - retAddr := binary.LittleEndian.Uint64(stackBuf[i:]) - i += 8 // ret addr. - sizeOfArgRet := binary.LittleEndian.Uint64(stackBuf[i:]) - i += 8 + sizeOfArgRet - returnAddresses = append(returnAddresses, uintptr(retAddr)) - if len(returnAddresses) == wasmdebug.MaxFrames { - break - } - } - return returnAddresses -} - -// GoCallStackView implements wazevo.goCallStackView. -func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 { - // (high address) - // +-----------------+ <----+ - // | xxxxxxxxxxx | | ;; optional unused space to make it 16-byte aligned. - // ^ | arg[N]/ret[M] | | - // sliceSize | | ............ | | sliceSize - // | | arg[1]/ret[1] | | - // v | arg[0]/ret[0] | <----+ - // | sliceSize | - // | frame_size | - // +-----------------+ <---- stackPointerBeforeGoCall - // (low address) - ptr := unsafe.Pointer(stackPointerBeforeGoCall) - data := (*uint64)(unsafe.Add(ptr, 16)) // skips the (frame_size, sliceSize). - size := *(*uint64)(unsafe.Add(ptr, 8)) - return unsafe.Slice(data, size) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go deleted file mode 100644 index 3a29e7cd6..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go +++ /dev/null @@ -1,119 +0,0 @@ -package backend - -import ( - "context" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -type ( - // Machine is a backend for a specific ISA machine. - Machine interface { - // StartLoweringFunction is called when the compilation of the given function is started. - // The maxBlockID is the maximum ssa.BasicBlockID in the function. - StartLoweringFunction(maxBlockID ssa.BasicBlockID) - - // LinkAdjacentBlocks is called after finished lowering all blocks in order to create one single instruction list. - LinkAdjacentBlocks(prev, next ssa.BasicBlock) - - // StartBlock is called when the compilation of the given block is started. - // The order of this being called is the reverse post order of the ssa.BasicBlock(s) as we iterate with - // ssa.Builder BlockIteratorReversePostOrderBegin and BlockIteratorReversePostOrderEnd. - StartBlock(ssa.BasicBlock) - - // EndBlock is called when the compilation of the current block is finished. - EndBlock() - - // FlushPendingInstructions flushes the pending instructions to the buffer. - // This will be called after the lowering of each SSA Instruction. - FlushPendingInstructions() - - // DisableStackCheck disables the stack check for the current compilation for debugging/testing. - DisableStackCheck() - - // SetCurrentABI initializes the FunctionABI for the given signature. - SetCurrentABI(abi *FunctionABI) - - // SetCompiler sets the compilation context used for the lifetime of Machine. - // This is only called once per Machine, i.e. before the first compilation. - SetCompiler(Compiler) - - // LowerSingleBranch is called when the compilation of the given single branch is started. - LowerSingleBranch(b *ssa.Instruction) - - // LowerConditionalBranch is called when the compilation of the given conditional branch is started. - LowerConditionalBranch(b *ssa.Instruction) - - // LowerInstr is called for each instruction in the given block except for the ones marked as already lowered - // via Compiler.MarkLowered. The order is reverse, i.e. from the last instruction to the first one. - // - // Note: this can lower multiple instructions (which produce the inputs) at once whenever it's possible - // for optimization. - LowerInstr(*ssa.Instruction) - - // Reset resets the machine state for the next compilation. - Reset() - - // InsertMove inserts a move instruction from src to dst whose type is typ. - InsertMove(dst, src regalloc.VReg, typ ssa.Type) - - // InsertReturn inserts the return instruction to return from the current function. - InsertReturn() - - // InsertLoadConstantBlockArg inserts the instruction(s) to load the constant value into the given regalloc.VReg. - InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) - - // Format returns the string representation of the currently compiled machine code. - // This is only for testing purpose. - Format() string - - // RegAlloc does the register allocation after lowering. - RegAlloc() - - // PostRegAlloc does the post register allocation, e.g. setting up prologue/epilogue, redundant move elimination, etc. - PostRegAlloc() - - // ResolveRelocations resolves the relocations after emitting machine code. - // * refToBinaryOffset: the map from the function reference (ssa.FuncRef) to the executable offset. - // * importedFns: the max index of the imported functions at the beginning of refToBinaryOffset - // * executable: the binary to resolve the relocations. - // * relocations: the relocations to resolve. - // * callTrampolineIslandOffsets: the offsets of the trampoline islands in the executable. - ResolveRelocations( - refToBinaryOffset []int, - importedFns int, - executable []byte, - relocations []RelocationInfo, - callTrampolineIslandOffsets []int, - ) - - // Encode encodes the machine instructions to the Compiler. - Encode(ctx context.Context) error - - // CompileGoFunctionTrampoline compiles the trampoline function to call a Go function of the given exit code and signature. - CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte - - // CompileStackGrowCallSequence returns the sequence of instructions shared by all functions to - // call the stack grow builtin function. - CompileStackGrowCallSequence() []byte - - // CompileEntryPreamble returns the sequence of instructions shared by multiple functions to - // enter the function from Go. - CompileEntryPreamble(signature *ssa.Signature) []byte - - // LowerParams lowers the given parameters. - LowerParams(params []ssa.Value) - - // LowerReturns lowers the given returns. - LowerReturns(returns []ssa.Value) - - // ArgsResultsRegs returns the registers used for arguments and return values. - ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) - - // CallTrampolineIslandInfo returns the interval of the offset where the trampoline island is placed, and - // the size of the trampoline island. If islandSize is zero, the trampoline island is not used on this machine. - CallTrampolineIslandInfo(numFunctions int) (interval, islandSize int, err error) - } -) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go deleted file mode 100644 index 5d15bd9dc..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go +++ /dev/null @@ -1,124 +0,0 @@ -package regalloc - -import "fmt" - -// These interfaces are implemented by ISA-specific backends to abstract away the details, and allow the register -// allocators to work on any ISA. - -type ( - // Function is the top-level interface to do register allocation, which corresponds to a CFG containing - // Blocks(s). - // - // I is the type of the instruction, and B is the type of the basic block. - Function[I Instr, B Block[I]] interface { - // PostOrderBlockIteratorBegin returns the first block in the post-order traversal of the CFG. - // In other words, the last blocks in the CFG will be returned first. - PostOrderBlockIteratorBegin() B - // PostOrderBlockIteratorNext returns the next block in the post-order traversal of the CFG. - PostOrderBlockIteratorNext() B - // ReversePostOrderBlockIteratorBegin returns the first block in the reverse post-order traversal of the CFG. - // In other words, the first blocks in the CFG will be returned first. - ReversePostOrderBlockIteratorBegin() B - // ReversePostOrderBlockIteratorNext returns the next block in the reverse post-order traversal of the CFG. - ReversePostOrderBlockIteratorNext() B - // ClobberedRegisters tell the clobbered registers by this function. - ClobberedRegisters([]VReg) - // LoopNestingForestRoots returns the number of roots of the loop nesting forest in a function. - LoopNestingForestRoots() int - // LoopNestingForestRoot returns the i-th root of the loop nesting forest in a function. - LoopNestingForestRoot(i int) B - // LowestCommonAncestor returns the lowest common ancestor of two blocks in the dominator tree. - LowestCommonAncestor(blk1, blk2 B) B - // Idom returns the immediate dominator of the given block. - Idom(blk B) B - - // LoopNestingForestChild returns the i-th child of the block in the loop nesting forest. - LoopNestingForestChild(b B, i int) B - // Pred returns the i-th predecessor of the block in the CFG. - Pred(b B, i int) B - // Succ returns the i-th successor of the block in the CFG. - Succ(b B, i int) B - // BlockParams returns the virtual registers used as the parameters of this block. - BlockParams(B, *[]VReg) []VReg - - // Followings are for rewriting the function. - - // SwapBefore swaps the two virtual registers at the end of the given block. - SwapBefore(x1, x2, tmp VReg, instr I) - // StoreRegisterBefore inserts store instruction(s) before the given instruction for the given virtual register. - StoreRegisterBefore(v VReg, instr I) - // StoreRegisterAfter inserts store instruction(s) after the given instruction for the given virtual register. - StoreRegisterAfter(v VReg, instr I) - // ReloadRegisterBefore inserts reload instruction(s) before the given instruction for the given virtual register. - ReloadRegisterBefore(v VReg, instr I) - // ReloadRegisterAfter inserts reload instruction(s) after the given instruction for the given virtual register. - ReloadRegisterAfter(v VReg, instr I) - // InsertMoveBefore inserts move instruction(s) before the given instruction for the given virtual registers. - InsertMoveBefore(dst, src VReg, instr I) - } - - // Block is a basic block in the CFG of a function, and it consists of multiple instructions, and predecessor Block(s). - // Right now, this corresponds to a ssa.BasicBlock lowered to the machine level. - Block[I Instr] interface { - comparable - // ID returns the unique identifier of this block which is ordered in the reverse post-order traversal of the CFG. - ID() int32 - // InstrIteratorBegin returns the first instruction in this block. Instructions added after lowering must be skipped. - // Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr. - InstrIteratorBegin() I - // InstrIteratorNext returns the next instruction in this block. Instructions added after lowering must be skipped. - // Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr. - InstrIteratorNext() I - // InstrRevIteratorBegin is the same as InstrIteratorBegin, but in the reverse order. - InstrRevIteratorBegin() I - // InstrRevIteratorNext is the same as InstrIteratorNext, but in the reverse order. - InstrRevIteratorNext() I - // FirstInstr returns the fist instruction in this block where instructions will be inserted after it. - FirstInstr() I - // LastInstrForInsertion returns the last instruction in this block where instructions will be inserted before it. - // Such insertions only happen when we need to insert spill/reload instructions to adjust the merge edges. - // At the time of register allocation, all the critical edges are already split, so there is no need - // to worry about the case where branching instruction has multiple successors. - // Therefore, usually, it is the nop instruction, but if the block ends with an unconditional branching, then it returns - // the unconditional branch, not the nop. In other words it is either nop or unconditional branch. - LastInstrForInsertion() I - // Preds returns the number of predecessors of this block in the CFG. - Preds() int - // Entry returns true if the block is for the entry block. - Entry() bool - // Succs returns the number of successors of this block in the CFG. - Succs() int - // LoopHeader returns true if this block is a loop header. - LoopHeader() bool - // LoopNestingForestChildren returns the number of children of this block in the loop nesting forest. - LoopNestingForestChildren() int - } - - // Instr is an instruction in a block, abstracting away the underlying ISA. - Instr interface { - comparable - fmt.Stringer - // Defs returns the virtual registers defined by this instruction. - Defs(*[]VReg) []VReg - // Uses returns the virtual registers used by this instruction. - // Note: multiple returned []VReg will not be held at the same time, so it's safe to use the same slice for this. - Uses(*[]VReg) []VReg - // AssignUse assigns the RealReg-allocated virtual register used by this instruction at the given index. - AssignUse(index int, v VReg) - // AssignDef assigns a RealReg-allocated virtual register defined by this instruction. - // This only accepts one register because we don't allocate registers for multi-def instructions (i.e. call instruction) - AssignDef(VReg) - // IsCopy returns true if this instruction is a move instruction between two registers. - // If true, the instruction is of the form of dst = src, and if the src and dst do not interfere with each other, - // we could coalesce them, and hence the copy can be eliminated from the final code. - IsCopy() bool - // IsCall returns true if this instruction is a call instruction. The result is used to insert - // caller saved register spills and restores. - IsCall() bool - // IsIndirectCall returns true if this instruction is an indirect call instruction which calls a function pointer. - // The result is used to insert caller saved register spills and restores. - IsIndirectCall() bool - // IsReturn returns true if this instruction is a return instruction. - IsReturn() bool - } -) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go deleted file mode 100644 index 46df807e6..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go +++ /dev/null @@ -1,123 +0,0 @@ -package regalloc - -import ( - "fmt" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -// VReg represents a register which is assigned to an SSA value. This is used to represent a register in the backend. -// A VReg may or may not be a physical register, and the info of physical register can be obtained by RealReg. -type VReg uint64 - -// VRegID is the lower 32bit of VReg, which is the pure identifier of VReg without RealReg info. -type VRegID uint32 - -// RealReg returns the RealReg of this VReg. -func (v VReg) RealReg() RealReg { - return RealReg(v >> 32) -} - -// IsRealReg returns true if this VReg is backed by a physical register. -func (v VReg) IsRealReg() bool { - return v.RealReg() != RealRegInvalid -} - -// FromRealReg returns a VReg from the given RealReg and RegType. -// This is used to represent a specific pre-colored register in the backend. -func FromRealReg(r RealReg, typ RegType) VReg { - rid := VRegID(r) - if rid > vRegIDReservedForRealNum { - panic(fmt.Sprintf("invalid real reg %d", r)) - } - return VReg(r).SetRealReg(r).SetRegType(typ) -} - -// SetRealReg sets the RealReg of this VReg and returns the updated VReg. -func (v VReg) SetRealReg(r RealReg) VReg { - return VReg(r)<<32 | (v & 0xff_00_ffffffff) -} - -// RegType returns the RegType of this VReg. -func (v VReg) RegType() RegType { - return RegType(v >> 40) -} - -// SetRegType sets the RegType of this VReg and returns the updated VReg. -func (v VReg) SetRegType(t RegType) VReg { - return VReg(t)<<40 | (v & 0x00_ff_ffffffff) -} - -// ID returns the VRegID of this VReg. -func (v VReg) ID() VRegID { - return VRegID(v & 0xffffffff) -} - -// Valid returns true if this VReg is Valid. -func (v VReg) Valid() bool { - return v.ID() != vRegIDInvalid && v.RegType() != RegTypeInvalid -} - -// RealReg represents a physical register. -type RealReg byte - -const RealRegInvalid RealReg = 0 - -const ( - vRegIDInvalid VRegID = 1 << 31 - VRegIDNonReservedBegin = vRegIDReservedForRealNum - vRegIDReservedForRealNum VRegID = 128 - VRegInvalid = VReg(vRegIDInvalid) -) - -// String implements fmt.Stringer. -func (r RealReg) String() string { - switch r { - case RealRegInvalid: - return "invalid" - default: - return fmt.Sprintf("r%d", r) - } -} - -// String implements fmt.Stringer. -func (v VReg) String() string { - if v.IsRealReg() { - return fmt.Sprintf("r%d", v.ID()) - } - return fmt.Sprintf("v%d?", v.ID()) -} - -// RegType represents the type of a register. -type RegType byte - -const ( - RegTypeInvalid RegType = iota - RegTypeInt - RegTypeFloat - NumRegType -) - -// String implements fmt.Stringer. -func (r RegType) String() string { - switch r { - case RegTypeInt: - return "int" - case RegTypeFloat: - return "float" - default: - return "invalid" - } -} - -// RegTypeOf returns the RegType of the given ssa.Type. -func RegTypeOf(p ssa.Type) RegType { - switch p { - case ssa.TypeI32, ssa.TypeI64: - return RegTypeInt - case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - return RegTypeFloat - default: - panic("invalid type") - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go deleted file mode 100644 index a5857f4f2..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go +++ /dev/null @@ -1,1189 +0,0 @@ -// Package regalloc performs register allocation. The algorithm can work on any ISA by implementing the interfaces in -// api.go. -// -// References: -// - https://web.stanford.edu/class/archive/cs/cs143/cs143.1128/lectures/17/Slides17.pdf -// - https://en.wikipedia.org/wiki/Chaitin%27s_algorithm -// - https://llvm.org/ProjectsWithLLVM/2004-Fall-CS426-LS.pdf -// - https://pfalcon.github.io/ssabook/latest/book-full.pdf: Chapter 9. for liveness analysis. -// - https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go -package regalloc - -import ( - "fmt" - "math" - "strings" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -// NewAllocator returns a new Allocator. -func NewAllocator[I Instr, B Block[I], F Function[I, B]](allocatableRegs *RegisterInfo) Allocator[I, B, F] { - a := Allocator[I, B, F]{ - regInfo: allocatableRegs, - phiDefInstListPool: wazevoapi.NewPool[phiDefInstList[I]](resetPhiDefInstList[I]), - blockStates: wazevoapi.NewIDedPool[blockState[I, B, F]](resetBlockState[I, B, F]), - } - a.state.vrStates = wazevoapi.NewIDedPool[vrState[I, B, F]](resetVrState[I, B, F]) - a.state.reset() - for _, regs := range allocatableRegs.AllocatableRegisters { - for _, r := range regs { - a.allocatableSet = a.allocatableSet.add(r) - } - } - return a -} - -type ( - // RegisterInfo holds the statically-known ISA-specific register information. - RegisterInfo struct { - // AllocatableRegisters is a 2D array of allocatable RealReg, indexed by regTypeNum and regNum. - // The order matters: the first element is the most preferred one when allocating. - AllocatableRegisters [NumRegType][]RealReg - CalleeSavedRegisters RegSet - CallerSavedRegisters RegSet - RealRegToVReg []VReg - // RealRegName returns the name of the given RealReg for debugging. - RealRegName func(r RealReg) string - RealRegType func(r RealReg) RegType - } - - // Allocator is a register allocator. - Allocator[I Instr, B Block[I], F Function[I, B]] struct { - // regInfo is static per ABI/ISA, and is initialized by the machine during Machine.PrepareRegisterAllocator. - regInfo *RegisterInfo - // allocatableSet is a set of allocatable RealReg derived from regInfo. Static per ABI/ISA. - allocatableSet RegSet - allocatedCalleeSavedRegs []VReg - vs []VReg - ss []*vrState[I, B, F] - copies []_copy[I, B, F] - phiDefInstListPool wazevoapi.Pool[phiDefInstList[I]] - - // Followings are re-used during various places. - blks []B - reals []RealReg - - // Following two fields are updated while iterating the blocks in the reverse postorder. - state state[I, B, F] - blockStates wazevoapi.IDedPool[blockState[I, B, F]] - } - - // _copy represents a source and destination pair of a copy instruction. - _copy[I Instr, B Block[I], F Function[I, B]] struct { - src *vrState[I, B, F] - dstID VRegID - } - - // programCounter represents an opaque index into the program which is used to represents a LiveInterval of a VReg. - programCounter int32 - - state[I Instr, B Block[I], F Function[I, B]] struct { - argRealRegs []VReg - regsInUse regInUseSet[I, B, F] - vrStates wazevoapi.IDedPool[vrState[I, B, F]] - - currentBlockID int32 - - // allocatedRegSet is a set of RealReg that are allocated during the allocation phase. This is reset per function. - allocatedRegSet RegSet - } - - blockState[I Instr, B Block[I], F Function[I, B]] struct { - // liveIns is a list of VReg that are live at the beginning of the block. - liveIns []*vrState[I, B, F] - // seen is true if the block is visited during the liveness analysis. - seen bool - // visited is true if the block is visited during the allocation phase. - visited bool - startFromPredIndex int - // startRegs is a list of RealReg that are used at the beginning of the block. This is used to fix the merge edges. - startRegs regInUseSet[I, B, F] - // endRegs is a list of RealReg that are used at the end of the block. This is used to fix the merge edges. - endRegs regInUseSet[I, B, F] - } - - vrState[I Instr, B Block[I], f Function[I, B]] struct { - v VReg - r RealReg - // defInstr is the instruction that defines this value. If this is the phi value and not the entry block, this is nil. - defInstr I - // defBlk is the block that defines this value. If this is the phi value, this is the block whose arguments contain this value. - defBlk B - // lca = lowest common ancestor. This is the block that is the lowest common ancestor of all the blocks that - // reloads this value. This is used to determine the spill location. Only valid if spilled=true. - lca B - // lastUse is the program counter of the last use of this value. This changes while iterating the block, and - // should not be used across the blocks as it becomes invalid. To check the validity, use lastUseUpdatedAtBlockID. - lastUse programCounter - lastUseUpdatedAtBlockID int32 - // spilled is true if this value is spilled i.e. the value is reload from the stack somewhere in the program. - // - // Note that this field is used during liveness analysis for different purpose. This is used to determine the - // value is live-in or not. - spilled bool - // isPhi is true if this is a phi value. - isPhi bool - desiredLoc desiredLoc - // phiDefInstList is a list of instructions that defines this phi value. - // This is used to determine the spill location, and only valid if isPhi=true. - *phiDefInstList[I] - } - - // phiDefInstList is a linked list of instructions that defines a phi value. - phiDefInstList[I Instr] struct { - instr I - v VReg - next *phiDefInstList[I] - } - - // desiredLoc represents a desired location for a VReg. - desiredLoc uint16 - // desiredLocKind is a kind of desired location for a VReg. - desiredLocKind uint16 -) - -const ( - // desiredLocKindUnspecified is a kind of desired location for a VReg that is not specified. - desiredLocKindUnspecified desiredLocKind = iota - // desiredLocKindStack is a kind of desired location for a VReg that is on the stack, only used for the phi values. - desiredLocKindStack - // desiredLocKindReg is a kind of desired location for a VReg that is in a register. - desiredLocKindReg - desiredLocUnspecified = desiredLoc(desiredLocKindUnspecified) - desiredLocStack = desiredLoc(desiredLocKindStack) -) - -func newDesiredLocReg(r RealReg) desiredLoc { - return desiredLoc(desiredLocKindReg) | desiredLoc(r<<2) -} - -func (d desiredLoc) realReg() RealReg { - return RealReg(d >> 2) -} - -func (d desiredLoc) stack() bool { - return d&3 == desiredLoc(desiredLocKindStack) -} - -func resetPhiDefInstList[I Instr](l *phiDefInstList[I]) { - var nilInstr I - l.instr = nilInstr - l.next = nil - l.v = VRegInvalid -} - -func (s *state[I, B, F]) dump(info *RegisterInfo) { //nolint:unused - fmt.Println("\t\tstate:") - fmt.Println("\t\t\targRealRegs:", s.argRealRegs) - fmt.Println("\t\t\tregsInUse", s.regsInUse.format(info)) - fmt.Println("\t\t\tallocatedRegSet:", s.allocatedRegSet.format(info)) - fmt.Println("\t\t\tused:", s.regsInUse.format(info)) - var strs []string - for i := 0; i <= s.vrStates.MaxIDEncountered(); i++ { - vs := s.vrStates.Get(i) - if vs == nil { - continue - } - if vs.r != RealRegInvalid { - strs = append(strs, fmt.Sprintf("(v%d: %s)", vs.v.ID(), info.RealRegName(vs.r))) - } - } - fmt.Println("\t\t\tvrStates:", strings.Join(strs, ", ")) -} - -func (s *state[I, B, F]) reset() { - s.argRealRegs = s.argRealRegs[:0] - s.vrStates.Reset() - s.allocatedRegSet = RegSet(0) - s.regsInUse.reset() - s.currentBlockID = -1 -} - -func resetVrState[I Instr, B Block[I], F Function[I, B]](vs *vrState[I, B, F]) { - vs.v = VRegInvalid - vs.r = RealRegInvalid - var nilInstr I - vs.defInstr = nilInstr - var nilBlk B - vs.defBlk = nilBlk - vs.spilled = false - vs.lastUse = -1 - vs.lastUseUpdatedAtBlockID = -1 - vs.lca = nilBlk - vs.isPhi = false - vs.phiDefInstList = nil - vs.desiredLoc = desiredLocUnspecified -} - -func (s *state[I, B, F]) getOrAllocateVRegState(v VReg) *vrState[I, B, F] { - st := s.vrStates.GetOrAllocate(int(v.ID())) - if st.v == VRegInvalid { - st.v = v - } - return st -} - -func (s *state[I, B, F]) getVRegState(v VRegID) *vrState[I, B, F] { - return s.vrStates.Get(int(v)) -} - -func (s *state[I, B, F]) useRealReg(r RealReg, vr *vrState[I, B, F]) { - s.regsInUse.add(r, vr) - vr.r = r - s.allocatedRegSet = s.allocatedRegSet.add(r) -} - -func (s *state[I, B, F]) releaseRealReg(r RealReg) { - current := s.regsInUse.get(r) - if current != nil { - s.regsInUse.remove(r) - current.r = RealRegInvalid - } -} - -// recordReload records that the given VReg is reloaded in the given block. -// This is used to determine the spill location by tracking the lowest common ancestor of all the blocks that reloads the value. -func (vs *vrState[I, B, F]) recordReload(f F, blk B) { - vs.spilled = true - var nilBlk B - if lca := vs.lca; lca == nilBlk { - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\t\tv%d is reloaded in blk%d,\n", vs.v.ID(), blk.ID()) - } - vs.lca = blk - } else if lca != blk { - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\t\tv%d is reloaded in blk%d, lca=%d\n", vs.v.ID(), blk.ID(), vs.lca.ID()) - } - vs.lca = f.LowestCommonAncestor(lca, blk) - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("updated lca=%d\n", vs.lca.ID()) - } - } -} - -func (a *Allocator[I, B, F]) findOrSpillAllocatable(s *state[I, B, F], allocatable []RealReg, forbiddenMask RegSet, preferred RealReg) (r RealReg) { - r = RealRegInvalid - // First, check if the preferredMask has any allocatable register. - if preferred != RealRegInvalid && !forbiddenMask.has(preferred) && !s.regsInUse.has(preferred) { - return preferred - } - - var lastUseAt programCounter - var spillVReg VReg - for _, candidateReal := range allocatable { - if forbiddenMask.has(candidateReal) { - continue - } - - using := s.regsInUse.get(candidateReal) - if using == nil { - // This is not used at this point. - return candidateReal - } - - // Real registers in use should not be spilled, so we skip them. - // For example, if the register is used as an argument register, and it might be - // spilled and not reloaded when it ends up being used as a temporary to pass - // stack based argument. - if using.v.IsRealReg() { - continue - } - - isPreferred := candidateReal == preferred - - // last == -1 means the value won't be used anymore. - if last := using.lastUse; r == RealRegInvalid || isPreferred || last == -1 || (lastUseAt != -1 && last > lastUseAt) { - lastUseAt = last - r = candidateReal - spillVReg = using.v - if isPreferred { - break - } - } - } - - if r == RealRegInvalid { - panic("not found any allocatable register") - } - - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\tspilling v%d when lastUseAt=%d and regsInUse=%s\n", spillVReg.ID(), lastUseAt, s.regsInUse.format(a.regInfo)) - } - s.releaseRealReg(r) - return r -} - -func (s *state[I, B, F]) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) RealReg { - for _, r := range allocatable { - if !s.regsInUse.has(r) && !forbiddenMask.has(r) { - return r - } - } - return RealRegInvalid -} - -func (s *state[I, B, F]) resetAt(bs *blockState[I, B, F]) { - s.regsInUse.range_(func(_ RealReg, vs *vrState[I, B, F]) { - vs.r = RealRegInvalid - }) - s.regsInUse.reset() - bs.endRegs.range_(func(r RealReg, vs *vrState[I, B, F]) { - if vs.lastUseUpdatedAtBlockID == s.currentBlockID && vs.lastUse == programCounterLiveIn { - s.regsInUse.add(r, vs) - vs.r = r - } - }) -} - -func resetBlockState[I Instr, B Block[I], F Function[I, B]](b *blockState[I, B, F]) { - b.seen = false - b.visited = false - b.endRegs.reset() - b.startRegs.reset() - b.startFromPredIndex = -1 - b.liveIns = b.liveIns[:0] -} - -func (b *blockState[I, B, F]) dump(a *RegisterInfo) { - fmt.Println("\t\tblockState:") - fmt.Println("\t\t\tstartRegs:", b.startRegs.format(a)) - fmt.Println("\t\t\tendRegs:", b.endRegs.format(a)) - fmt.Println("\t\t\tstartFromPredIndex:", b.startFromPredIndex) - fmt.Println("\t\t\tvisited:", b.visited) -} - -// DoAllocation performs register allocation on the given Function. -func (a *Allocator[I, B, F]) DoAllocation(f F) { - a.livenessAnalysis(f) - a.alloc(f) - a.determineCalleeSavedRealRegs(f) -} - -func (a *Allocator[I, B, F]) determineCalleeSavedRealRegs(f F) { - a.allocatedCalleeSavedRegs = a.allocatedCalleeSavedRegs[:0] - a.state.allocatedRegSet.Range(func(allocatedRealReg RealReg) { - if a.regInfo.CalleeSavedRegisters.has(allocatedRealReg) { - a.allocatedCalleeSavedRegs = append(a.allocatedCalleeSavedRegs, a.regInfo.RealRegToVReg[allocatedRealReg]) - } - }) - f.ClobberedRegisters(a.allocatedCalleeSavedRegs) -} - -func (a *Allocator[I, B, F]) getOrAllocateBlockState(blockID int32) *blockState[I, B, F] { - return a.blockStates.GetOrAllocate(int(blockID)) -} - -// phiBlk returns the block that defines the given phi value, nil otherwise. -func (vs *vrState[I, B, F]) phiBlk() B { - if vs.isPhi { - return vs.defBlk - } - var nilBlk B - return nilBlk -} - -const ( - programCounterLiveIn = math.MinInt32 - programCounterLiveOut = math.MaxInt32 -) - -// liveAnalysis constructs Allocator.blockLivenessData. -// The algorithm here is described in https://pfalcon.github.io/ssabook/latest/book-full.pdf Chapter 9.2. -func (a *Allocator[I, B, F]) livenessAnalysis(f F) { - s := &a.state - - for i := VRegID(0); i < vRegIDReservedForRealNum; i++ { - s.getOrAllocateVRegState(VReg(i).SetRealReg(RealReg(i))) - } - - var nilBlk B - var nilInstr I - for blk := f.PostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.PostOrderBlockIteratorNext() { - // We should gather phi value data. - for _, p := range f.BlockParams(blk, &a.vs) { - vs := s.getOrAllocateVRegState(p) - vs.isPhi = true - vs.defBlk = blk - } - - blkID := blk.ID() - info := a.getOrAllocateBlockState(blkID) - - a.ss = a.ss[:0] - const ( - flagDeleted = false - flagLive = true - ) - ns := blk.Succs() - for i := 0; i < ns; i++ { - succ := f.Succ(blk, i) - if succ == nilBlk { - continue - } - - succID := succ.ID() - succInfo := a.getOrAllocateBlockState(succID) - if !succInfo.seen { // This means the back edge. - continue - } - - for _, st := range succInfo.liveIns { - if st.phiBlk() != succ && st.spilled != flagLive { //nolint:gosimple - // We use .spilled field to store the flag. - st.spilled = flagLive - a.ss = append(a.ss, st) - } - } - } - - for instr := blk.InstrRevIteratorBegin(); instr != nilInstr; instr = blk.InstrRevIteratorNext() { - - var use, def VReg - var defIsPhi bool - for _, def = range instr.Defs(&a.vs) { - if !def.IsRealReg() { - st := s.getOrAllocateVRegState(def) - defIsPhi = st.isPhi - // Note: We use .spilled field to store the flag. - st.spilled = flagDeleted - } - } - for _, use = range instr.Uses(&a.vs) { - if !use.IsRealReg() { - st := s.getOrAllocateVRegState(use) - // Note: We use .spilled field to store the flag. - if st.spilled != flagLive { //nolint:gosimple - st.spilled = flagLive - a.ss = append(a.ss, st) - } - } - } - - if defIsPhi { - if use.Valid() && use.IsRealReg() { - // If the destination is a phi value, and the source is a real register, this is the beginning of the function. - a.state.argRealRegs = append(a.state.argRealRegs, use) - } - } - } - - for _, st := range a.ss { - // We use .spilled field to store the flag. - if st.spilled == flagLive { //nolint:gosimple - info.liveIns = append(info.liveIns, st) - st.spilled = false - } - } - - info.seen = true - } - - nrs := f.LoopNestingForestRoots() - for i := 0; i < nrs; i++ { - root := f.LoopNestingForestRoot(i) - a.loopTreeDFS(f, root) - } -} - -// loopTreeDFS implements the Algorithm 9.3 in the book in an iterative way. -func (a *Allocator[I, B, F]) loopTreeDFS(f F, entry B) { - a.blks = a.blks[:0] - a.blks = append(a.blks, entry) - - for len(a.blks) > 0 { - tail := len(a.blks) - 1 - loop := a.blks[tail] - a.blks = a.blks[:tail] - a.ss = a.ss[:0] - const ( - flagDone = false - flagPending = true - ) - info := a.getOrAllocateBlockState(loop.ID()) - for _, st := range info.liveIns { - if st.phiBlk() != loop { - a.ss = append(a.ss, st) - // We use .spilled field to store the flag. - st.spilled = flagPending - } - } - - var siblingAddedView []*vrState[I, B, F] - cn := loop.LoopNestingForestChildren() - for i := 0; i < cn; i++ { - child := f.LoopNestingForestChild(loop, i) - childID := child.ID() - childInfo := a.getOrAllocateBlockState(childID) - - if i == 0 { - begin := len(childInfo.liveIns) - for _, st := range a.ss { - // We use .spilled field to store the flag. - if st.spilled == flagPending { //nolint:gosimple - st.spilled = flagDone - // TODO: deduplicate, though I don't think it has much impact. - childInfo.liveIns = append(childInfo.liveIns, st) - } - } - siblingAddedView = childInfo.liveIns[begin:] - } else { - // TODO: deduplicate, though I don't think it has much impact. - childInfo.liveIns = append(childInfo.liveIns, siblingAddedView...) - } - - if child.LoopHeader() { - a.blks = append(a.blks, child) - } - } - - if cn == 0 { - // If there's no forest child, we haven't cleared the .spilled field at this point. - for _, st := range a.ss { - st.spilled = false - } - } - } -} - -// alloc allocates registers for the given function by iterating the blocks in the reverse postorder. -// The algorithm here is derived from the Go compiler's allocator https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go -// In short, this is a simply linear scan register allocation where each block inherits the register allocation state from -// one of its predecessors. Each block inherits the selected state and starts allocation from there. -// If there's a discrepancy in the end states between predecessors, the adjustments are made to ensure consistency after allocation is done (which we call "fixing merge state"). -// The spill instructions (store into the dedicated slots) are inserted after all the allocations and fixing merge states. That is because -// at the point, we all know where the reloads happen, and therefore we can know the best place to spill the values. More precisely, -// the spill happens in the block that is the lowest common ancestor of all the blocks that reloads the value. -// -// All of these logics are almost the same as Go's compiler which has a dedicated description in the source file ^^. -func (a *Allocator[I, B, F]) alloc(f F) { - // First we allocate each block in the reverse postorder (at least one predecessor should be allocated for each block). - var nilBlk B - for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.ReversePostOrderBlockIteratorNext() { - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("========== allocating blk%d ========\n", blk.ID()) - } - if blk.Entry() { - a.finalizeStartReg(f, blk) - } - a.allocBlock(f, blk) - } - // After the allocation, we all know the start and end state of each block. So we can fix the merge states. - for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.ReversePostOrderBlockIteratorNext() { - a.fixMergeState(f, blk) - } - // Finally, we insert the spill instructions as we know all the places where the reloads happen. - a.scheduleSpills(f) -} - -func (a *Allocator[I, B, F]) updateLiveInVRState(liveness *blockState[I, B, F]) { - currentBlockID := a.state.currentBlockID - for _, vs := range liveness.liveIns { - vs.lastUse = programCounterLiveIn - vs.lastUseUpdatedAtBlockID = currentBlockID - } -} - -func (a *Allocator[I, B, F]) finalizeStartReg(f F, blk B) { - bID := blk.ID() - s := &a.state - currentBlkState := a.getOrAllocateBlockState(bID) - if currentBlkState.startFromPredIndex > -1 { - return - } - - s.currentBlockID = bID - a.updateLiveInVRState(currentBlkState) - - preds := blk.Preds() - var predState *blockState[I, B, F] - switch preds { - case 0: // This is the entry block. - case 1: - predID := f.Pred(blk, 0).ID() - predState = a.getOrAllocateBlockState(predID) - currentBlkState.startFromPredIndex = 0 - default: - // TODO: there should be some better heuristic to choose the predecessor. - for i := 0; i < preds; i++ { - predID := f.Pred(blk, i).ID() - if _predState := a.getOrAllocateBlockState(predID); _predState.visited { - predState = _predState - currentBlkState.startFromPredIndex = i - break - } - } - } - if predState == nil { - if !blk.Entry() { - panic(fmt.Sprintf("BUG: at lease one predecessor should be visited for blk%d", blk.ID())) - } - for _, u := range s.argRealRegs { - s.useRealReg(u.RealReg(), s.getVRegState(u.ID())) - } - currentBlkState.startFromPredIndex = 0 - } else { - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("allocating blk%d starting from blk%d (on index=%d) \n", - bID, f.Pred(blk, currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex) - } - s.resetAt(predState) - } - - s.regsInUse.range_(func(allocated RealReg, v *vrState[I, B, F]) { - currentBlkState.startRegs.add(allocated, v) - }) - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("finalized start reg for blk%d: %s\n", blk.ID(), currentBlkState.startRegs.format(a.regInfo)) - } -} - -func (a *Allocator[I, B, F]) allocBlock(f F, blk B) { - bID := blk.ID() - s := &a.state - currentBlkState := a.getOrAllocateBlockState(bID) - s.currentBlockID = bID - - if currentBlkState.startFromPredIndex < 0 { - panic("BUG: startFromPredIndex should be set in finalizeStartReg prior to allocBlock") - } - - // Clears the previous state. - s.regsInUse.range_(func(allocatedRealReg RealReg, vr *vrState[I, B, F]) { vr.r = RealRegInvalid }) - s.regsInUse.reset() - // Then set the start state. - currentBlkState.startRegs.range_(func(allocatedRealReg RealReg, vr *vrState[I, B, F]) { s.useRealReg(allocatedRealReg, vr) }) - - desiredUpdated := a.ss[:0] - - // Update the last use of each VReg. - a.copies = a.copies[:0] // Stores the copy instructions. - var pc programCounter - var nilInstr I - for instr := blk.InstrIteratorBegin(); instr != nilInstr; instr = blk.InstrIteratorNext() { - var useState *vrState[I, B, F] - for _, use := range instr.Uses(&a.vs) { - useState = s.getVRegState(use.ID()) - if !use.IsRealReg() { - useState.lastUse = pc - } - } - - if instr.IsCopy() { - def := instr.Defs(&a.vs)[0] - a.copies = append(a.copies, _copy[I, B, F]{src: useState, dstID: def.ID()}) - r := def.RealReg() - if r != RealRegInvalid { - if !useState.isPhi { // TODO: no idea why do we need this. - useState.desiredLoc = newDesiredLocReg(r) - desiredUpdated = append(desiredUpdated, useState) - } - } - } - pc++ - } - - // Mark all live-out values by checking live-in of the successors. - // While doing so, we also update the desired register values. - var succ B - var nilBlk B - for i, ns := 0, blk.Succs(); i < ns; i++ { - succ = f.Succ(blk, i) - if succ == nilBlk { - continue - } - - succID := succ.ID() - succState := a.getOrAllocateBlockState(succID) - for _, st := range succState.liveIns { - if st.phiBlk() != succ { - st.lastUse = programCounterLiveOut - } - } - - if succState.startFromPredIndex > -1 { - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("blk%d -> blk%d: start_regs: %s\n", bID, succID, succState.startRegs.format(a.regInfo)) - } - succState.startRegs.range_(func(allocatedRealReg RealReg, vs *vrState[I, B, F]) { - vs.desiredLoc = newDesiredLocReg(allocatedRealReg) - desiredUpdated = append(desiredUpdated, vs) - }) - for _, p := range f.BlockParams(succ, &a.vs) { - vs := s.getVRegState(p.ID()) - if vs.desiredLoc.realReg() == RealRegInvalid { - vs.desiredLoc = desiredLocStack - desiredUpdated = append(desiredUpdated, vs) - } - } - } - } - - // Propagate the desired register values from the end of the block to the beginning. - for _, instr := range a.copies { - defState := s.getVRegState(instr.dstID) - desired := defState.desiredLoc.realReg() - useState := instr.src - if useState.phiBlk() != succ && useState.desiredLoc == desiredLocUnspecified { - useState.desiredLoc = newDesiredLocReg(desired) - desiredUpdated = append(desiredUpdated, useState) - } - } - - pc = 0 - for instr := blk.InstrIteratorBegin(); instr != nilInstr; instr = blk.InstrIteratorNext() { - if wazevoapi.RegAllocLoggingEnabled { - fmt.Println(instr) - } - - var currentUsedSet RegSet - killSet := a.reals[:0] - - // Gather the set of registers that will be used in the current instruction. - uses := instr.Uses(&a.vs) - for _, use := range uses { - if use.IsRealReg() { - r := use.RealReg() - currentUsedSet = currentUsedSet.add(r) - if a.allocatableSet.has(r) { - killSet = append(killSet, r) - } - } else { - vs := s.getVRegState(use.ID()) - if r := vs.r; r != RealRegInvalid { - currentUsedSet = currentUsedSet.add(r) - } - } - } - - for i, use := range uses { - if !use.IsRealReg() { - vs := s.getVRegState(use.ID()) - killed := vs.lastUse == pc - r := vs.r - - if r == RealRegInvalid { - r = a.findOrSpillAllocatable(s, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet, - // Prefer the desired register if it's available. - vs.desiredLoc.realReg()) - vs.recordReload(f, blk) - f.ReloadRegisterBefore(use.SetRealReg(r), instr) - s.useRealReg(r, vs) - } - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\ttrying to use v%v on %s\n", use.ID(), a.regInfo.RealRegName(r)) - } - instr.AssignUse(i, use.SetRealReg(r)) - currentUsedSet = currentUsedSet.add(r) - if killed { - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\tkill v%d with %s\n", use.ID(), a.regInfo.RealRegName(r)) - } - killSet = append(killSet, r) - } - } - } - - isIndirect := instr.IsIndirectCall() - if instr.IsCall() || isIndirect { - addr := RealRegInvalid - if isIndirect { - addr = a.vs[0].RealReg() - } - a.releaseCallerSavedRegs(addr) - } - - for _, r := range killSet { - s.releaseRealReg(r) - } - a.reals = killSet - - defs := instr.Defs(&a.vs) - switch len(defs) { - default: - // Some instructions define multiple values on real registers. - // E.g. call instructions (following calling convention) / div instruction on x64 that defines both rax and rdx. - // - // Note that currently I assume that such instructions define only the pre colored real registers, not the VRegs - // that require allocations. If we need to support such case, we need to add the logic to handle it here, - // though is there any such instruction? - for _, def := range defs { - if !def.IsRealReg() { - panic("BUG: multiple defs should be on real registers") - } - r := def.RealReg() - if s.regsInUse.has(r) { - s.releaseRealReg(r) - } - s.useRealReg(r, s.getVRegState(def.ID())) - } - case 0: - case 1: - def := defs[0] - vState := s.getVRegState(def.ID()) - if def.IsRealReg() { - r := def.RealReg() - if a.allocatableSet.has(r) { - if s.regsInUse.has(r) { - s.releaseRealReg(r) - } - s.useRealReg(r, vState) - } - } else { - r := vState.r - - if desired := vState.desiredLoc.realReg(); desired != RealRegInvalid { - if r != desired { - if (vState.isPhi && vState.defBlk == succ) || - // If this is not a phi and it's already assigned a real reg, - // this value has multiple definitions, hence we cannot assign the desired register. - (!s.regsInUse.has(desired) && r == RealRegInvalid) { - // If the phi value is passed via a real register, we force the value to be in the desired register. - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\t\tv%d is phi and desiredReg=%s\n", def.ID(), a.regInfo.RealRegName(desired)) - } - if r != RealRegInvalid { - // If the value is already in a different real register, we release it to change the state. - // Otherwise, multiple registers might have the same values at the end, which results in - // messing up the merge state reconciliation. - s.releaseRealReg(r) - } - r = desired - s.releaseRealReg(r) - s.useRealReg(r, vState) - } - } - } - - // Allocate a new real register if `def` is not currently assigned one. - // It can happen when multiple instructions define the same VReg (e.g. const loads). - if r == RealRegInvalid { - if instr.IsCopy() { - copySrc := instr.Uses(&a.vs)[0].RealReg() - if a.allocatableSet.has(copySrc) && !s.regsInUse.has(copySrc) { - r = copySrc - } - } - if r == RealRegInvalid { - typ := def.RegType() - r = a.findOrSpillAllocatable(s, a.regInfo.AllocatableRegisters[typ], RegSet(0), RealRegInvalid) - } - s.useRealReg(r, vState) - } - dr := def.SetRealReg(r) - instr.AssignDef(dr) - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\tdefining v%d with %s\n", def.ID(), a.regInfo.RealRegName(r)) - } - if vState.isPhi { - if vState.desiredLoc.stack() { // Stack based phi value. - f.StoreRegisterAfter(dr, instr) - // Release the real register as it's not used anymore. - s.releaseRealReg(r) - } else { - // Only the register based phis are necessary to track the defining instructions - // since the stack-based phis are already having stores inserted ^. - n := a.phiDefInstListPool.Allocate() - n.instr = instr - n.next = vState.phiDefInstList - n.v = dr - vState.phiDefInstList = n - } - } else { - vState.defInstr = instr - vState.defBlk = blk - } - } - } - if wazevoapi.RegAllocLoggingEnabled { - fmt.Println(instr) - } - pc++ - } - - s.regsInUse.range_(func(allocated RealReg, v *vrState[I, B, F]) { currentBlkState.endRegs.add(allocated, v) }) - - currentBlkState.visited = true - if wazevoapi.RegAllocLoggingEnabled { - currentBlkState.dump(a.regInfo) - } - - // Reset the desired end location. - for _, vs := range desiredUpdated { - vs.desiredLoc = desiredLocUnspecified - } - a.ss = desiredUpdated[:0] - - for i := 0; i < blk.Succs(); i++ { - succ := f.Succ(blk, i) - if succ == nilBlk { - continue - } - // If the successor is not visited yet, finalize the start state. - a.finalizeStartReg(f, succ) - } -} - -func (a *Allocator[I, B, F]) releaseCallerSavedRegs(addrReg RealReg) { - s := &a.state - - for allocated := RealReg(0); allocated < 64; allocated++ { - if allocated == addrReg { // If this is the call indirect, we should not touch the addr register. - continue - } - if vs := s.regsInUse.get(allocated); vs != nil { - if vs.v.IsRealReg() { - continue // This is the argument register as it's already used by VReg backed by the corresponding RealReg. - } - if !a.regInfo.CallerSavedRegisters.has(allocated) { - // If this is not a caller-saved register, it is safe to keep it across the call. - continue - } - s.releaseRealReg(allocated) - } - } -} - -func (a *Allocator[I, B, F]) fixMergeState(f F, blk B) { - preds := blk.Preds() - if preds <= 1 { - return - } - - s := &a.state - - // Restores the state at the beginning of the block. - bID := blk.ID() - blkSt := a.getOrAllocateBlockState(bID) - desiredOccupants := &blkSt.startRegs - var desiredOccupantsSet RegSet - for i, v := range desiredOccupants { - if v != nil { - desiredOccupantsSet = desiredOccupantsSet.add(RealReg(i)) - } - } - - if wazevoapi.RegAllocLoggingEnabled { - fmt.Println("fixMergeState", blk.ID(), ":", desiredOccupants.format(a.regInfo)) - } - - s.currentBlockID = bID - a.updateLiveInVRState(blkSt) - - for i := 0; i < preds; i++ { - if i == blkSt.startFromPredIndex { - continue - } - - pred := f.Pred(blk, i) - predSt := a.getOrAllocateBlockState(pred.ID()) - - s.resetAt(predSt) - - // Finds the free registers if any. - intTmp, floatTmp := VRegInvalid, VRegInvalid - if intFree := s.findAllocatable( - a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupantsSet, - ); intFree != RealRegInvalid { - intTmp = FromRealReg(intFree, RegTypeInt) - } - if floatFree := s.findAllocatable( - a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupantsSet, - ); floatFree != RealRegInvalid { - floatTmp = FromRealReg(floatFree, RegTypeFloat) - } - - for r := RealReg(0); r < 64; r++ { - desiredVReg := desiredOccupants.get(r) - if desiredVReg == nil { - continue - } - - currentVReg := s.regsInUse.get(r) - if currentVReg != nil && desiredVReg.v.ID() == currentVReg.v.ID() { - continue - } - - typ := desiredVReg.v.RegType() - var tmpRealReg VReg - if typ == RegTypeInt { - tmpRealReg = intTmp - } else { - tmpRealReg = floatTmp - } - a.reconcileEdge(f, r, pred, currentVReg, desiredVReg, tmpRealReg, typ) - } - } -} - -// reconcileEdge reconciles the register state between the current block and the predecessor for the real register `r`. -// -// - currentVReg is the current VReg value that sits on the register `r`. This can be VRegInvalid if the register is not used at the end of the predecessor. -// - desiredVReg is the desired VReg value that should be on the register `r`. -// - freeReg is the temporary register that can be used to swap the values, which may or may not be used. -// - typ is the register type of the `r`. -func (a *Allocator[I, B, F]) reconcileEdge(f F, - r RealReg, - pred B, - currentState, desiredState *vrState[I, B, F], - freeReg VReg, - typ RegType, -) { - desiredVReg := desiredState.v - currentVReg := VRegInvalid - if currentState != nil { - currentVReg = currentState.v - } - // There are four cases to consider: - // 1. currentVReg is valid, but desiredVReg is on the stack. - // 2. Both currentVReg and desiredVReg are valid. - // 3. Desired is on a different register than `r` and currentReg is not valid. - // 4. Desired is on the stack and currentReg is not valid. - - s := &a.state - if currentVReg.Valid() { - er := desiredState.r - if er == RealRegInvalid { - // Case 1: currentVReg is valid, but desiredVReg is on the stack. - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n", - desiredVReg.ID(), a.regInfo.RealRegName(r), - ) - } - // We need to move the current value to the stack, and reload the desired value into the register. - // TODO: we can do better here. - f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion()) - s.releaseRealReg(r) - - desiredState.recordReload(f, pred) - f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion()) - s.useRealReg(r, desiredState) - return - } else { - // Case 2: Both currentVReg and desiredVReg are valid. - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n", - desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er), - ) - } - // This case, we need to swap the values between the current and desired values. - f.SwapBefore( - currentVReg.SetRealReg(r), - desiredVReg.SetRealReg(er), - freeReg, - pred.LastInstrForInsertion(), - ) - s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg()) - s.releaseRealReg(r) - s.releaseRealReg(er) - s.useRealReg(r, desiredState) - s.useRealReg(er, currentState) - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er)) - } - } - } else { - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\t\tv%d is desired to be on %s, current not used\n", - desiredVReg.ID(), a.regInfo.RealRegName(r), - ) - } - if currentReg := desiredState.r; currentReg != RealRegInvalid { - // Case 3: Desired is on a different register than `r` and currentReg is not valid. - // We simply need to move the desired value to the register. - f.InsertMoveBefore( - FromRealReg(r, typ), - desiredVReg.SetRealReg(currentReg), - pred.LastInstrForInsertion(), - ) - s.releaseRealReg(currentReg) - } else { - // Case 4: Both currentVReg and desiredVReg are not valid. - // We simply need to reload the desired value into the register. - desiredState.recordReload(f, pred) - f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion()) - } - s.useRealReg(r, desiredState) - } -} - -func (a *Allocator[I, B, F]) scheduleSpills(f F) { - states := a.state.vrStates - for i := 0; i <= states.MaxIDEncountered(); i++ { - vs := states.Get(i) - if vs == nil { - continue - } - if vs.spilled { - a.scheduleSpill(f, vs) - } - } -} - -func (a *Allocator[I, B, F]) scheduleSpill(f F, vs *vrState[I, B, F]) { - v := vs.v - // If the value is the phi value, we need to insert a spill after each phi definition. - if vs.isPhi { - for defInstr := vs.phiDefInstList; defInstr != nil; defInstr = defInstr.next { - f.StoreRegisterAfter(defInstr.v, defInstr.instr) - } - return - } - - pos := vs.lca - definingBlk := vs.defBlk - r := RealRegInvalid - var nilBlk B - if definingBlk == nilBlk { - panic(fmt.Sprintf("BUG: definingBlk should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String())) - } - if pos == nilBlk { - panic(fmt.Sprintf("BUG: pos should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String())) - } - - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("v%d is spilled in blk%d, lca=blk%d\n", v.ID(), definingBlk.ID(), pos.ID()) - } - for pos != definingBlk { - st := a.getOrAllocateBlockState(pos.ID()) - for rr := RealReg(0); rr < 64; rr++ { - if vs := st.startRegs.get(rr); vs != nil && vs.v == v { - r = rr - // Already in the register, so we can place the spill at the beginning of the block. - break - } - } - - if r != RealRegInvalid { - break - } - - pos = f.Idom(pos) - } - - if pos == definingBlk { - defInstr := vs.defInstr - defInstr.Defs(&a.vs) - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("schedule spill v%d after %v\n", v.ID(), defInstr) - } - f.StoreRegisterAfter(a.vs[0], defInstr) - } else { - // Found an ancestor block that holds the value in the register at the beginning of the block. - // We need to insert a spill before the last use. - first := pos.FirstInstr() - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("schedule spill v%d before %v\n", v.ID(), first) - } - f.StoreRegisterAfter(v.SetRealReg(r), first) - } -} - -// Reset resets the allocator's internal state so that it can be reused. -func (a *Allocator[I, B, F]) Reset() { - a.state.reset() - a.blockStates.Reset() - a.phiDefInstListPool.Reset() - a.vs = a.vs[:0] -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go deleted file mode 100644 index ce84c9c0c..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go +++ /dev/null @@ -1,96 +0,0 @@ -package regalloc - -import ( - "fmt" - "strings" -) - -// NewRegSet returns a new RegSet with the given registers. -func NewRegSet(regs ...RealReg) RegSet { - var ret RegSet - for _, r := range regs { - ret = ret.add(r) - } - return ret -} - -// RegSet represents a set of registers. -type RegSet uint64 - -func (rs RegSet) format(info *RegisterInfo) string { //nolint:unused - var ret []string - for i := 0; i < 64; i++ { - if rs&(1<<uint(i)) != 0 { - ret = append(ret, info.RealRegName(RealReg(i))) - } - } - return strings.Join(ret, ", ") -} - -func (rs RegSet) has(r RealReg) bool { - return rs&(1<<uint(r)) != 0 -} - -func (rs RegSet) add(r RealReg) RegSet { - if r >= 64 { - return rs - } - return rs | 1<<uint(r) -} - -func (rs RegSet) Range(f func(allocatedRealReg RealReg)) { - for i := 0; i < 64; i++ { - if rs&(1<<uint(i)) != 0 { - f(RealReg(i)) - } - } -} - -type regInUseSet[I Instr, B Block[I], F Function[I, B]] [64]*vrState[I, B, F] - -func newRegInUseSet[I Instr, B Block[I], F Function[I, B]]() regInUseSet[I, B, F] { - var ret regInUseSet[I, B, F] - ret.reset() - return ret -} - -func (rs *regInUseSet[I, B, F]) reset() { - clear(rs[:]) -} - -func (rs *regInUseSet[I, B, F]) format(info *RegisterInfo) string { //nolint:unused - var ret []string - for i, vr := range rs { - if vr != nil { - ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.v.ID())) - } - } - return strings.Join(ret, ", ") -} - -func (rs *regInUseSet[I, B, F]) has(r RealReg) bool { - return r < 64 && rs[r] != nil -} - -func (rs *regInUseSet[I, B, F]) get(r RealReg) *vrState[I, B, F] { - return rs[r] -} - -func (rs *regInUseSet[I, B, F]) remove(r RealReg) { - rs[r] = nil -} - -func (rs *regInUseSet[I, B, F]) add(r RealReg, vr *vrState[I, B, F]) { - if r >= 64 { - return - } - rs[r] = vr -} - -func (rs *regInUseSet[I, B, F]) range_(f func(allocatedRealReg RealReg, vr *vrState[I, B, F])) { - for i, vr := range rs { - if vr != nil { - f(RealReg(i), vr) - } - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go deleted file mode 100644 index 47a275a3a..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go +++ /dev/null @@ -1,19 +0,0 @@ -package backend - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -// SSAValueDefinition represents a definition of an SSA value. -type SSAValueDefinition struct { - V ssa.Value - // Instr is not nil if this is a definition from an instruction. - Instr *ssa.Instruction - // RefCount is the number of references to the result. - RefCount uint32 -} - -// IsFromInstr returns true if this definition is from an instruction. -func (d *SSAValueDefinition) IsFromInstr() bool { - return d.Instr != nil -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go deleted file mode 100644 index 639429a63..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go +++ /dev/null @@ -1,708 +0,0 @@ -package wazevo - -import ( - "context" - "fmt" - "reflect" - "runtime" - "sync/atomic" - "unsafe" - - "github.com/tetratelabs/wazero/api" - "github.com/tetratelabs/wazero/experimental" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" - "github.com/tetratelabs/wazero/internal/expctxkeys" - "github.com/tetratelabs/wazero/internal/internalapi" - "github.com/tetratelabs/wazero/internal/wasm" - "github.com/tetratelabs/wazero/internal/wasmdebug" - "github.com/tetratelabs/wazero/internal/wasmruntime" -) - -type ( - // callEngine implements api.Function. - callEngine struct { - internalapi.WazeroOnly - stack []byte - // stackTop is the pointer to the *aligned* top of the stack. This must be updated - // whenever the stack is changed. This is passed to the assembly function - // at the very beginning of api.Function Call/CallWithStack. - stackTop uintptr - // executable is the pointer to the executable code for this function. - executable *byte - preambleExecutable *byte - // parent is the *moduleEngine from which this callEngine is created. - parent *moduleEngine - // indexInModule is the index of the function in the module. - indexInModule wasm.Index - // sizeOfParamResultSlice is the size of the parameter/result slice. - sizeOfParamResultSlice int - requiredParams int - // execCtx holds various information to be read/written by assembly functions. - execCtx executionContext - // execCtxPtr holds the pointer to the executionContext which doesn't change after callEngine is created. - execCtxPtr uintptr - numberOfResults int - stackIteratorImpl stackIterator - } - - // executionContext is the struct to be read/written by assembly functions. - executionContext struct { - // exitCode holds the wazevoapi.ExitCode describing the state of the function execution. - exitCode wazevoapi.ExitCode - // callerModuleContextPtr holds the moduleContextOpaque for Go function calls. - callerModuleContextPtr *byte - // originalFramePointer holds the original frame pointer of the caller of the assembly function. - originalFramePointer uintptr - // originalStackPointer holds the original stack pointer of the caller of the assembly function. - originalStackPointer uintptr - // goReturnAddress holds the return address to go back to the caller of the assembly function. - goReturnAddress uintptr - // stackBottomPtr holds the pointer to the bottom of the stack. - stackBottomPtr *byte - // goCallReturnAddress holds the return address to go back to the caller of the Go function. - goCallReturnAddress *byte - // stackPointerBeforeGoCall holds the stack pointer before calling a Go function. - stackPointerBeforeGoCall *uint64 - // stackGrowRequiredSize holds the required size of stack grow. - stackGrowRequiredSize uintptr - // memoryGrowTrampolineAddress holds the address of memory grow trampoline function. - memoryGrowTrampolineAddress *byte - // stackGrowCallTrampolineAddress holds the address of stack grow trampoline function. - stackGrowCallTrampolineAddress *byte - // checkModuleExitCodeTrampolineAddress holds the address of check-module-exit-code function. - checkModuleExitCodeTrampolineAddress *byte - // savedRegisters is the opaque spaces for save/restore registers. - // We want to align 16 bytes for each register, so we use [64][2]uint64. - savedRegisters [64][2]uint64 - // goFunctionCallCalleeModuleContextOpaque is the pointer to the target Go function's moduleContextOpaque. - goFunctionCallCalleeModuleContextOpaque uintptr - // tableGrowTrampolineAddress holds the address of table grow trampoline function. - tableGrowTrampolineAddress *byte - // refFuncTrampolineAddress holds the address of ref-func trampoline function. - refFuncTrampolineAddress *byte - // memmoveAddress holds the address of memmove function implemented by Go runtime. See memmove.go. - memmoveAddress uintptr - // framePointerBeforeGoCall holds the frame pointer before calling a Go function. Note: only used in amd64. - framePointerBeforeGoCall uintptr - // memoryWait32TrampolineAddress holds the address of memory_wait32 trampoline function. - memoryWait32TrampolineAddress *byte - // memoryWait32TrampolineAddress holds the address of memory_wait64 trampoline function. - memoryWait64TrampolineAddress *byte - // memoryNotifyTrampolineAddress holds the address of the memory_notify trampoline function. - memoryNotifyTrampolineAddress *byte - } -) - -func (c *callEngine) requiredInitialStackSize() int { - const initialStackSizeDefault = 10240 - stackSize := initialStackSizeDefault - paramResultInBytes := c.sizeOfParamResultSlice * 8 * 2 // * 8 because uint64 is 8 bytes, and *2 because we need both separated param/result slots. - required := paramResultInBytes + 32 + 16 // 32 is enough to accommodate the call frame info, and 16 exists just in case when []byte is not aligned to 16 bytes. - if required > stackSize { - stackSize = required - } - return stackSize -} - -func (c *callEngine) init() { - stackSize := c.requiredInitialStackSize() - if wazevoapi.StackGuardCheckEnabled { - stackSize += wazevoapi.StackGuardCheckGuardPageSize - } - c.stack = make([]byte, stackSize) - c.stackTop = alignedStackTop(c.stack) - if wazevoapi.StackGuardCheckEnabled { - c.execCtx.stackBottomPtr = &c.stack[wazevoapi.StackGuardCheckGuardPageSize] - } else { - c.execCtx.stackBottomPtr = &c.stack[0] - } - c.execCtxPtr = uintptr(unsafe.Pointer(&c.execCtx)) -} - -// alignedStackTop returns 16-bytes aligned stack top of given stack. -// 16 bytes should be good for all platform (arm64/amd64). -func alignedStackTop(s []byte) uintptr { - stackAddr := uintptr(unsafe.Pointer(&s[len(s)-1])) - return stackAddr - (stackAddr & (16 - 1)) -} - -// Definition implements api.Function. -func (c *callEngine) Definition() api.FunctionDefinition { - return c.parent.module.Source.FunctionDefinition(c.indexInModule) -} - -// Call implements api.Function. -func (c *callEngine) Call(ctx context.Context, params ...uint64) ([]uint64, error) { - if c.requiredParams != len(params) { - return nil, fmt.Errorf("expected %d params, but passed %d", c.requiredParams, len(params)) - } - paramResultSlice := make([]uint64, c.sizeOfParamResultSlice) - copy(paramResultSlice, params) - if err := c.callWithStack(ctx, paramResultSlice); err != nil { - return nil, err - } - return paramResultSlice[:c.numberOfResults], nil -} - -func (c *callEngine) addFrame(builder wasmdebug.ErrorBuilder, addr uintptr) (def api.FunctionDefinition, listener experimental.FunctionListener) { - eng := c.parent.parent.parent - cm := eng.compiledModuleOfAddr(addr) - if cm == nil { - // This case, the module might have been closed and deleted from the engine. - // We fall back to searching the imported modules that can be referenced from this callEngine. - - // First, we check itself. - if checkAddrInBytes(addr, c.parent.parent.executable) { - cm = c.parent.parent - } else { - // Otherwise, search all imported modules. TODO: maybe recursive, but not sure it's useful in practice. - p := c.parent - for i := range p.importedFunctions { - candidate := p.importedFunctions[i].me.parent - if checkAddrInBytes(addr, candidate.executable) { - cm = candidate - break - } - } - } - } - - if cm != nil { - index := cm.functionIndexOf(addr) - def = cm.module.FunctionDefinition(cm.module.ImportFunctionCount + index) - var sources []string - if dw := cm.module.DWARFLines; dw != nil { - sourceOffset := cm.getSourceOffset(addr) - sources = dw.Line(sourceOffset) - } - builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sources) - if len(cm.listeners) > 0 { - listener = cm.listeners[index] - } - } - return -} - -// CallWithStack implements api.Function. -func (c *callEngine) CallWithStack(ctx context.Context, paramResultStack []uint64) (err error) { - if c.sizeOfParamResultSlice > len(paramResultStack) { - return fmt.Errorf("need %d params, but stack size is %d", c.sizeOfParamResultSlice, len(paramResultStack)) - } - return c.callWithStack(ctx, paramResultStack) -} - -// CallWithStack implements api.Function. -func (c *callEngine) callWithStack(ctx context.Context, paramResultStack []uint64) (err error) { - snapshotEnabled := ctx.Value(expctxkeys.EnableSnapshotterKey{}) != nil - if snapshotEnabled { - ctx = context.WithValue(ctx, expctxkeys.SnapshotterKey{}, c) - } - - if wazevoapi.StackGuardCheckEnabled { - defer func() { - wazevoapi.CheckStackGuardPage(c.stack) - }() - } - - p := c.parent - ensureTermination := p.parent.ensureTermination - m := p.module - if ensureTermination { - select { - case <-ctx.Done(): - // If the provided context is already done, close the module and return the error. - m.CloseWithCtxErr(ctx) - return m.FailIfClosed() - default: - } - } - - var paramResultPtr *uint64 - if len(paramResultStack) > 0 { - paramResultPtr = ¶mResultStack[0] - } - defer func() { - r := recover() - if s, ok := r.(*snapshot); ok { - // A snapshot that wasn't handled was created by a different call engine possibly from a nested wasm invocation, - // let it propagate up to be handled by the caller. - panic(s) - } - if r != nil { - type listenerForAbort struct { - def api.FunctionDefinition - lsn experimental.FunctionListener - } - - var listeners []listenerForAbort - builder := wasmdebug.NewErrorBuilder() - def, lsn := c.addFrame(builder, uintptr(unsafe.Pointer(c.execCtx.goCallReturnAddress))) - if lsn != nil { - listeners = append(listeners, listenerForAbort{def, lsn}) - } - returnAddrs := unwindStack( - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), - c.execCtx.framePointerBeforeGoCall, - c.stackTop, - nil, - ) - for _, retAddr := range returnAddrs[:len(returnAddrs)-1] { // the last return addr is the trampoline, so we skip it. - def, lsn = c.addFrame(builder, retAddr) - if lsn != nil { - listeners = append(listeners, listenerForAbort{def, lsn}) - } - } - err = builder.FromRecovered(r) - - for _, lsn := range listeners { - lsn.lsn.Abort(ctx, m, lsn.def, err) - } - } else { - if err != wasmruntime.ErrRuntimeStackOverflow { // Stackoverflow case shouldn't be panic (to avoid extreme stack unwinding). - err = c.parent.module.FailIfClosed() - } - } - - if err != nil { - // Ensures that we can reuse this callEngine even after an error. - c.execCtx.exitCode = wazevoapi.ExitCodeOK - } - }() - - if ensureTermination { - done := m.CloseModuleOnCanceledOrTimeout(ctx) - defer done() - } - - if c.stackTop&(16-1) != 0 { - panic("BUG: stack must be aligned to 16 bytes") - } - entrypoint(c.preambleExecutable, c.executable, c.execCtxPtr, c.parent.opaquePtr, paramResultPtr, c.stackTop) - for { - switch ec := c.execCtx.exitCode; ec & wazevoapi.ExitCodeMask { - case wazevoapi.ExitCodeOK: - return nil - case wazevoapi.ExitCodeGrowStack: - oldsp := uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)) - oldTop := c.stackTop - oldStack := c.stack - var newsp, newfp uintptr - if wazevoapi.StackGuardCheckEnabled { - newsp, newfp, err = c.growStackWithGuarded() - } else { - newsp, newfp, err = c.growStack() - } - if err != nil { - return err - } - adjustClonedStack(oldsp, oldTop, newsp, newfp, c.stackTop) - // Old stack must be alive until the new stack is adjusted. - runtime.KeepAlive(oldStack) - c.execCtx.exitCode = wazevoapi.ExitCodeOK - afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, newsp, newfp) - case wazevoapi.ExitCodeGrowMemory: - mod := c.callerModuleInstance() - mem := mod.MemoryInstance - s := goCallStackView(c.execCtx.stackPointerBeforeGoCall) - argRes := &s[0] - if res, ok := mem.Grow(uint32(*argRes)); !ok { - *argRes = uint64(0xffffffff) // = -1 in signed 32-bit integer. - } else { - *argRes = uint64(res) - } - c.execCtx.exitCode = wazevoapi.ExitCodeOK - afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall) - case wazevoapi.ExitCodeTableGrow: - mod := c.callerModuleInstance() - s := goCallStackView(c.execCtx.stackPointerBeforeGoCall) - tableIndex, num, ref := uint32(s[0]), uint32(s[1]), uintptr(s[2]) - table := mod.Tables[tableIndex] - s[0] = uint64(uint32(int32(table.Grow(num, ref)))) - c.execCtx.exitCode = wazevoapi.ExitCodeOK - afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall) - case wazevoapi.ExitCodeCallGoFunction: - index := wazevoapi.GoFunctionIndexFromExitCode(ec) - f := hostModuleGoFuncFromOpaque[api.GoFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque) - func() { - if snapshotEnabled { - defer snapshotRecoverFn(c) - } - f.Call(ctx, goCallStackView(c.execCtx.stackPointerBeforeGoCall)) - }() - // Back to the native code. - c.execCtx.exitCode = wazevoapi.ExitCodeOK - afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall) - case wazevoapi.ExitCodeCallGoFunctionWithListener: - index := wazevoapi.GoFunctionIndexFromExitCode(ec) - f := hostModuleGoFuncFromOpaque[api.GoFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque) - listeners := hostModuleListenersSliceFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque) - s := goCallStackView(c.execCtx.stackPointerBeforeGoCall) - // Call Listener.Before. - callerModule := c.callerModuleInstance() - listener := listeners[index] - hostModule := hostModuleFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque) - def := hostModule.FunctionDefinition(wasm.Index(index)) - listener.Before(ctx, callerModule, def, s, c.stackIterator(true)) - // Call into the Go function. - func() { - if snapshotEnabled { - defer snapshotRecoverFn(c) - } - f.Call(ctx, s) - }() - // Call Listener.After. - listener.After(ctx, callerModule, def, s) - // Back to the native code. - c.execCtx.exitCode = wazevoapi.ExitCodeOK - afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall) - case wazevoapi.ExitCodeCallGoModuleFunction: - index := wazevoapi.GoFunctionIndexFromExitCode(ec) - f := hostModuleGoFuncFromOpaque[api.GoModuleFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque) - mod := c.callerModuleInstance() - func() { - if snapshotEnabled { - defer snapshotRecoverFn(c) - } - f.Call(ctx, mod, goCallStackView(c.execCtx.stackPointerBeforeGoCall)) - }() - // Back to the native code. - c.execCtx.exitCode = wazevoapi.ExitCodeOK - afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall) - case wazevoapi.ExitCodeCallGoModuleFunctionWithListener: - index := wazevoapi.GoFunctionIndexFromExitCode(ec) - f := hostModuleGoFuncFromOpaque[api.GoModuleFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque) - listeners := hostModuleListenersSliceFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque) - s := goCallStackView(c.execCtx.stackPointerBeforeGoCall) - // Call Listener.Before. - callerModule := c.callerModuleInstance() - listener := listeners[index] - hostModule := hostModuleFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque) - def := hostModule.FunctionDefinition(wasm.Index(index)) - listener.Before(ctx, callerModule, def, s, c.stackIterator(true)) - // Call into the Go function. - func() { - if snapshotEnabled { - defer snapshotRecoverFn(c) - } - f.Call(ctx, callerModule, s) - }() - // Call Listener.After. - listener.After(ctx, callerModule, def, s) - // Back to the native code. - c.execCtx.exitCode = wazevoapi.ExitCodeOK - afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall) - case wazevoapi.ExitCodeCallListenerBefore: - stack := goCallStackView(c.execCtx.stackPointerBeforeGoCall) - index := wasm.Index(stack[0]) - mod := c.callerModuleInstance() - listener := mod.Engine.(*moduleEngine).listeners[index] - def := mod.Source.FunctionDefinition(index + mod.Source.ImportFunctionCount) - listener.Before(ctx, mod, def, stack[1:], c.stackIterator(false)) - c.execCtx.exitCode = wazevoapi.ExitCodeOK - afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall) - case wazevoapi.ExitCodeCallListenerAfter: - stack := goCallStackView(c.execCtx.stackPointerBeforeGoCall) - index := wasm.Index(stack[0]) - mod := c.callerModuleInstance() - listener := mod.Engine.(*moduleEngine).listeners[index] - def := mod.Source.FunctionDefinition(index + mod.Source.ImportFunctionCount) - listener.After(ctx, mod, def, stack[1:]) - c.execCtx.exitCode = wazevoapi.ExitCodeOK - afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall) - case wazevoapi.ExitCodeCheckModuleExitCode: - // Note: this operation must be done in Go, not native code. The reason is that - // native code cannot be preempted and that means it can block forever if there are not - // enough OS threads (which we don't have control over). - if err := m.FailIfClosed(); err != nil { - panic(err) - } - c.execCtx.exitCode = wazevoapi.ExitCodeOK - afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall) - case wazevoapi.ExitCodeRefFunc: - mod := c.callerModuleInstance() - s := goCallStackView(c.execCtx.stackPointerBeforeGoCall) - funcIndex := wasm.Index(s[0]) - ref := mod.Engine.FunctionInstanceReference(funcIndex) - s[0] = uint64(ref) - c.execCtx.exitCode = wazevoapi.ExitCodeOK - afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall) - case wazevoapi.ExitCodeMemoryWait32: - mod := c.callerModuleInstance() - mem := mod.MemoryInstance - if !mem.Shared { - panic(wasmruntime.ErrRuntimeExpectedSharedMemory) - } - - s := goCallStackView(c.execCtx.stackPointerBeforeGoCall) - timeout, exp, addr := int64(s[0]), uint32(s[1]), uintptr(s[2]) - base := uintptr(unsafe.Pointer(&mem.Buffer[0])) - - offset := uint32(addr - base) - res := mem.Wait32(offset, exp, timeout, func(mem *wasm.MemoryInstance, offset uint32) uint32 { - addr := unsafe.Add(unsafe.Pointer(&mem.Buffer[0]), offset) - return atomic.LoadUint32((*uint32)(addr)) - }) - s[0] = res - c.execCtx.exitCode = wazevoapi.ExitCodeOK - afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall) - case wazevoapi.ExitCodeMemoryWait64: - mod := c.callerModuleInstance() - mem := mod.MemoryInstance - if !mem.Shared { - panic(wasmruntime.ErrRuntimeExpectedSharedMemory) - } - - s := goCallStackView(c.execCtx.stackPointerBeforeGoCall) - timeout, exp, addr := int64(s[0]), uint64(s[1]), uintptr(s[2]) - base := uintptr(unsafe.Pointer(&mem.Buffer[0])) - - offset := uint32(addr - base) - res := mem.Wait64(offset, exp, timeout, func(mem *wasm.MemoryInstance, offset uint32) uint64 { - addr := unsafe.Add(unsafe.Pointer(&mem.Buffer[0]), offset) - return atomic.LoadUint64((*uint64)(addr)) - }) - s[0] = uint64(res) - c.execCtx.exitCode = wazevoapi.ExitCodeOK - afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall) - case wazevoapi.ExitCodeMemoryNotify: - mod := c.callerModuleInstance() - mem := mod.MemoryInstance - - s := goCallStackView(c.execCtx.stackPointerBeforeGoCall) - count, addr := uint32(s[0]), s[1] - offset := uint32(uintptr(addr) - uintptr(unsafe.Pointer(&mem.Buffer[0]))) - res := mem.Notify(offset, count) - s[0] = uint64(res) - c.execCtx.exitCode = wazevoapi.ExitCodeOK - afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall) - case wazevoapi.ExitCodeUnreachable: - panic(wasmruntime.ErrRuntimeUnreachable) - case wazevoapi.ExitCodeMemoryOutOfBounds: - panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) - case wazevoapi.ExitCodeTableOutOfBounds: - panic(wasmruntime.ErrRuntimeInvalidTableAccess) - case wazevoapi.ExitCodeIndirectCallNullPointer: - panic(wasmruntime.ErrRuntimeInvalidTableAccess) - case wazevoapi.ExitCodeIndirectCallTypeMismatch: - panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch) - case wazevoapi.ExitCodeIntegerOverflow: - panic(wasmruntime.ErrRuntimeIntegerOverflow) - case wazevoapi.ExitCodeIntegerDivisionByZero: - panic(wasmruntime.ErrRuntimeIntegerDivideByZero) - case wazevoapi.ExitCodeInvalidConversionToInteger: - panic(wasmruntime.ErrRuntimeInvalidConversionToInteger) - case wazevoapi.ExitCodeUnalignedAtomic: - panic(wasmruntime.ErrRuntimeUnalignedAtomic) - default: - panic("BUG") - } - } -} - -func (c *callEngine) callerModuleInstance() *wasm.ModuleInstance { - return moduleInstanceFromOpaquePtr(c.execCtx.callerModuleContextPtr) -} - -const callStackCeiling = uintptr(50000000) // in uint64 (8 bytes) == 400000000 bytes in total == 400mb. - -func (c *callEngine) growStackWithGuarded() (newSP uintptr, newFP uintptr, err error) { - if wazevoapi.StackGuardCheckEnabled { - wazevoapi.CheckStackGuardPage(c.stack) - } - newSP, newFP, err = c.growStack() - if err != nil { - return - } - if wazevoapi.StackGuardCheckEnabled { - c.execCtx.stackBottomPtr = &c.stack[wazevoapi.StackGuardCheckGuardPageSize] - } - return -} - -// growStack grows the stack, and returns the new stack pointer. -func (c *callEngine) growStack() (newSP, newFP uintptr, err error) { - currentLen := uintptr(len(c.stack)) - if callStackCeiling < currentLen { - err = wasmruntime.ErrRuntimeStackOverflow - return - } - - newLen := 2*currentLen + c.execCtx.stackGrowRequiredSize + 16 // Stack might be aligned to 16 bytes, so add 16 bytes just in case. - newSP, newFP, c.stackTop, c.stack = c.cloneStack(newLen) - c.execCtx.stackBottomPtr = &c.stack[0] - return -} - -func (c *callEngine) cloneStack(l uintptr) (newSP, newFP, newTop uintptr, newStack []byte) { - newStack = make([]byte, l) - - relSp := c.stackTop - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)) - relFp := c.stackTop - c.execCtx.framePointerBeforeGoCall - - // Copy the existing contents in the previous Go-allocated stack into the new one. - var prevStackAligned, newStackAligned []byte - { - //nolint:staticcheck - sh := (*reflect.SliceHeader)(unsafe.Pointer(&prevStackAligned)) - sh.Data = c.stackTop - relSp - sh.Len = int(relSp) - sh.Cap = int(relSp) - } - newTop = alignedStackTop(newStack) - { - newSP = newTop - relSp - newFP = newTop - relFp - //nolint:staticcheck - sh := (*reflect.SliceHeader)(unsafe.Pointer(&newStackAligned)) - sh.Data = newSP - sh.Len = int(relSp) - sh.Cap = int(relSp) - } - copy(newStackAligned, prevStackAligned) - return -} - -func (c *callEngine) stackIterator(onHostCall bool) experimental.StackIterator { - c.stackIteratorImpl.reset(c, onHostCall) - return &c.stackIteratorImpl -} - -// stackIterator implements experimental.StackIterator. -type stackIterator struct { - retAddrs []uintptr - retAddrCursor int - eng *engine - pc uint64 - - currentDef *wasm.FunctionDefinition -} - -func (si *stackIterator) reset(c *callEngine, onHostCall bool) { - if onHostCall { - si.retAddrs = append(si.retAddrs[:0], uintptr(unsafe.Pointer(c.execCtx.goCallReturnAddress))) - } else { - si.retAddrs = si.retAddrs[:0] - } - si.retAddrs = unwindStack(uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall, c.stackTop, si.retAddrs) - si.retAddrs = si.retAddrs[:len(si.retAddrs)-1] // the last return addr is the trampoline, so we skip it. - si.retAddrCursor = 0 - si.eng = c.parent.parent.parent -} - -// Next implements the same method as documented on experimental.StackIterator. -func (si *stackIterator) Next() bool { - if si.retAddrCursor >= len(si.retAddrs) { - return false - } - - addr := si.retAddrs[si.retAddrCursor] - cm := si.eng.compiledModuleOfAddr(addr) - if cm != nil { - index := cm.functionIndexOf(addr) - def := cm.module.FunctionDefinition(cm.module.ImportFunctionCount + index) - si.currentDef = def - si.retAddrCursor++ - si.pc = uint64(addr) - return true - } - return false -} - -// ProgramCounter implements the same method as documented on experimental.StackIterator. -func (si *stackIterator) ProgramCounter() experimental.ProgramCounter { - return experimental.ProgramCounter(si.pc) -} - -// Function implements the same method as documented on experimental.StackIterator. -func (si *stackIterator) Function() experimental.InternalFunction { - return si -} - -// Definition implements the same method as documented on experimental.InternalFunction. -func (si *stackIterator) Definition() api.FunctionDefinition { - return si.currentDef -} - -// SourceOffsetForPC implements the same method as documented on experimental.InternalFunction. -func (si *stackIterator) SourceOffsetForPC(pc experimental.ProgramCounter) uint64 { - upc := uintptr(pc) - cm := si.eng.compiledModuleOfAddr(upc) - return cm.getSourceOffset(upc) -} - -// snapshot implements experimental.Snapshot -type snapshot struct { - sp, fp, top uintptr - returnAddress *byte - stack []byte - savedRegisters [64][2]uint64 - ret []uint64 - c *callEngine -} - -// Snapshot implements the same method as documented on experimental.Snapshotter. -func (c *callEngine) Snapshot() experimental.Snapshot { - returnAddress := c.execCtx.goCallReturnAddress - oldTop, oldSp := c.stackTop, uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)) - newSP, newFP, newTop, newStack := c.cloneStack(uintptr(len(c.stack)) + 16) - adjustClonedStack(oldSp, oldTop, newSP, newFP, newTop) - return &snapshot{ - sp: newSP, - fp: newFP, - top: newTop, - savedRegisters: c.execCtx.savedRegisters, - returnAddress: returnAddress, - stack: newStack, - c: c, - } -} - -// Restore implements the same method as documented on experimental.Snapshot. -func (s *snapshot) Restore(ret []uint64) { - s.ret = ret - panic(s) -} - -func (s *snapshot) doRestore() { - spp := *(**uint64)(unsafe.Pointer(&s.sp)) - view := goCallStackView(spp) - copy(view, s.ret) - - c := s.c - c.stack = s.stack - c.stackTop = s.top - ec := &c.execCtx - ec.stackBottomPtr = &c.stack[0] - ec.stackPointerBeforeGoCall = spp - ec.framePointerBeforeGoCall = s.fp - ec.goCallReturnAddress = s.returnAddress - ec.savedRegisters = s.savedRegisters -} - -// Error implements the same method on error. -func (s *snapshot) Error() string { - return "unhandled snapshot restore, this generally indicates restore was called from a different " + - "exported function invocation than snapshot" -} - -func snapshotRecoverFn(c *callEngine) { - if r := recover(); r != nil { - if s, ok := r.(*snapshot); ok && s.c == c { - s.doRestore() - } else { - panic(r) - } - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go deleted file mode 100644 index a6df3e7e7..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go +++ /dev/null @@ -1,843 +0,0 @@ -package wazevo - -import ( - "context" - "encoding/hex" - "errors" - "fmt" - "runtime" - "sort" - "sync" - "unsafe" - - "github.com/tetratelabs/wazero/api" - "github.com/tetratelabs/wazero/experimental" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/frontend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" - "github.com/tetratelabs/wazero/internal/filecache" - "github.com/tetratelabs/wazero/internal/platform" - "github.com/tetratelabs/wazero/internal/version" - "github.com/tetratelabs/wazero/internal/wasm" -) - -type ( - // engine implements wasm.Engine. - engine struct { - wazeroVersion string - fileCache filecache.Cache - compiledModules map[wasm.ModuleID]*compiledModule - // sortedCompiledModules is a list of compiled modules sorted by the initial address of the executable. - sortedCompiledModules []*compiledModule - mux sync.RWMutex - // sharedFunctions is compiled functions shared by all modules. - sharedFunctions *sharedFunctions - // setFinalizer defaults to runtime.SetFinalizer, but overridable for tests. - setFinalizer func(obj interface{}, finalizer interface{}) - - // The followings are reused for compiling shared functions. - machine backend.Machine - be backend.Compiler - } - - sharedFunctions struct { - // memoryGrowExecutable is a compiled trampoline executable for memory.grow builtin function. - memoryGrowExecutable []byte - // checkModuleExitCode is a compiled trampoline executable for checking module instance exit code. This - // is used when ensureTermination is true. - checkModuleExitCode []byte - // stackGrowExecutable is a compiled executable for growing stack builtin function. - stackGrowExecutable []byte - // tableGrowExecutable is a compiled trampoline executable for table.grow builtin function. - tableGrowExecutable []byte - // refFuncExecutable is a compiled trampoline executable for ref.func builtin function. - refFuncExecutable []byte - // memoryWait32Executable is a compiled trampoline executable for memory.wait32 builtin function - memoryWait32Executable []byte - // memoryWait64Executable is a compiled trampoline executable for memory.wait64 builtin function - memoryWait64Executable []byte - // memoryNotifyExecutable is a compiled trampoline executable for memory.notify builtin function - memoryNotifyExecutable []byte - listenerBeforeTrampolines map[*wasm.FunctionType][]byte - listenerAfterTrampolines map[*wasm.FunctionType][]byte - } - - // compiledModule is a compiled variant of a wasm.Module and ready to be used for instantiation. - compiledModule struct { - *executables - // functionOffsets maps a local function index to the offset in the executable. - functionOffsets []int - parent *engine - module *wasm.Module - ensureTermination bool - listeners []experimental.FunctionListener - listenerBeforeTrampolines []*byte - listenerAfterTrampolines []*byte - - // The followings are only available for non host modules. - - offsets wazevoapi.ModuleContextOffsetData - sharedFunctions *sharedFunctions - sourceMap sourceMap - } - - executables struct { - executable []byte - entryPreambles [][]byte - } -) - -// sourceMap is a mapping from the offset of the executable to the offset of the original wasm binary. -type sourceMap struct { - // executableOffsets is a sorted list of offsets of the executable. This is index-correlated with wasmBinaryOffsets, - // in other words executableOffsets[i] is the offset of the executable which corresponds to the offset of a Wasm - // binary pointed by wasmBinaryOffsets[i]. - executableOffsets []uintptr - // wasmBinaryOffsets is the counterpart of executableOffsets. - wasmBinaryOffsets []uint64 -} - -var _ wasm.Engine = (*engine)(nil) - -// NewEngine returns the implementation of wasm.Engine. -func NewEngine(ctx context.Context, _ api.CoreFeatures, fc filecache.Cache) wasm.Engine { - machine := newMachine() - be := backend.NewCompiler(ctx, machine, ssa.NewBuilder()) - e := &engine{ - compiledModules: make(map[wasm.ModuleID]*compiledModule), - setFinalizer: runtime.SetFinalizer, - machine: machine, - be: be, - fileCache: fc, - wazeroVersion: version.GetWazeroVersion(), - } - e.compileSharedFunctions() - return e -} - -// CompileModule implements wasm.Engine. -func (e *engine) CompileModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (err error) { - if wazevoapi.PerfMapEnabled { - wazevoapi.PerfMap.Lock() - defer wazevoapi.PerfMap.Unlock() - } - - if _, ok, err := e.getCompiledModule(module, listeners, ensureTermination); ok { // cache hit! - return nil - } else if err != nil { - return err - } - - if wazevoapi.DeterministicCompilationVerifierEnabled { - ctx = wazevoapi.NewDeterministicCompilationVerifierContext(ctx, len(module.CodeSection)) - } - cm, err := e.compileModule(ctx, module, listeners, ensureTermination) - if err != nil { - return err - } - if err = e.addCompiledModule(module, cm); err != nil { - return err - } - - if wazevoapi.DeterministicCompilationVerifierEnabled { - for i := 0; i < wazevoapi.DeterministicCompilationVerifyingIter; i++ { - _, err := e.compileModule(ctx, module, listeners, ensureTermination) - if err != nil { - return err - } - } - } - - if len(listeners) > 0 { - cm.listeners = listeners - cm.listenerBeforeTrampolines = make([]*byte, len(module.TypeSection)) - cm.listenerAfterTrampolines = make([]*byte, len(module.TypeSection)) - for i := range module.TypeSection { - typ := &module.TypeSection[i] - before, after := e.getListenerTrampolineForType(typ) - cm.listenerBeforeTrampolines[i] = before - cm.listenerAfterTrampolines[i] = after - } - } - return nil -} - -func (exec *executables) compileEntryPreambles(m *wasm.Module, machine backend.Machine, be backend.Compiler) { - exec.entryPreambles = make([][]byte, len(m.TypeSection)) - for i := range m.TypeSection { - typ := &m.TypeSection[i] - sig := frontend.SignatureForWasmFunctionType(typ) - be.Init() - buf := machine.CompileEntryPreamble(&sig) - executable := mmapExecutable(buf) - exec.entryPreambles[i] = executable - - if wazevoapi.PerfMapEnabled { - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&executable[0])), - uint64(len(executable)), fmt.Sprintf("entry_preamble::type=%s", typ.String())) - } - } -} - -func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (*compiledModule, error) { - withListener := len(listeners) > 0 - cm := &compiledModule{ - offsets: wazevoapi.NewModuleContextOffsetData(module, withListener), parent: e, module: module, - ensureTermination: ensureTermination, - executables: &executables{}, - } - - if module.IsHostModule { - return e.compileHostModule(ctx, module, listeners) - } - - importedFns, localFns := int(module.ImportFunctionCount), len(module.FunctionSection) - if localFns == 0 { - return cm, nil - } - - rels := make([]backend.RelocationInfo, 0) - refToBinaryOffset := make([]int, importedFns+localFns) - - if wazevoapi.DeterministicCompilationVerifierEnabled { - // The compilation must be deterministic regardless of the order of functions being compiled. - wazevoapi.DeterministicCompilationVerifierRandomizeIndexes(ctx) - } - - needSourceInfo := module.DWARFLines != nil - - // Creates new compiler instances which are reused for each function. - ssaBuilder := ssa.NewBuilder() - fe := frontend.NewFrontendCompiler(module, ssaBuilder, &cm.offsets, ensureTermination, withListener, needSourceInfo) - machine := newMachine() - be := backend.NewCompiler(ctx, machine, ssaBuilder) - - cm.executables.compileEntryPreambles(module, machine, be) - - totalSize := 0 // Total binary size of the executable. - cm.functionOffsets = make([]int, localFns) - bodies := make([][]byte, localFns) - - // Trampoline relocation related variables. - trampolineInterval, callTrampolineIslandSize, err := machine.CallTrampolineIslandInfo(localFns) - if err != nil { - return nil, err - } - needCallTrampoline := callTrampolineIslandSize > 0 - var callTrampolineIslandOffsets []int // Holds the offsets of trampoline islands. - - for i := range module.CodeSection { - if wazevoapi.DeterministicCompilationVerifierEnabled { - i = wazevoapi.DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex(ctx, i) - } - - fidx := wasm.Index(i + importedFns) - - if wazevoapi.NeedFunctionNameInContext { - def := module.FunctionDefinition(fidx) - name := def.DebugName() - if len(def.ExportNames()) > 0 { - name = def.ExportNames()[0] - } - ctx = wazevoapi.SetCurrentFunctionName(ctx, i, fmt.Sprintf("[%d/%d]%s", i, len(module.CodeSection)-1, name)) - } - - needListener := len(listeners) > 0 && listeners[i] != nil - body, relsPerFunc, err := e.compileLocalWasmFunction(ctx, module, wasm.Index(i), fe, ssaBuilder, be, needListener) - if err != nil { - return nil, fmt.Errorf("compile function %d/%d: %v", i, len(module.CodeSection)-1, err) - } - - // Align 16-bytes boundary. - totalSize = (totalSize + 15) &^ 15 - cm.functionOffsets[i] = totalSize - - if needSourceInfo { - // At the beginning of the function, we add the offset of the function body so that - // we can resolve the source location of the call site of before listener call. - cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize)) - cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, module.CodeSection[i].BodyOffsetInCodeSection) - - for _, info := range be.SourceOffsetInfo() { - cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize)+uintptr(info.ExecutableOffset)) - cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, uint64(info.SourceOffset)) - } - } - - fref := frontend.FunctionIndexToFuncRef(fidx) - refToBinaryOffset[fref] = totalSize - - // At this point, relocation offsets are relative to the start of the function body, - // so we adjust it to the start of the executable. - for _, r := range relsPerFunc { - r.Offset += int64(totalSize) - rels = append(rels, r) - } - - bodies[i] = body - totalSize += len(body) - if wazevoapi.PrintMachineCodeHexPerFunction { - fmt.Printf("[[[machine code for %s]]]\n%s\n\n", wazevoapi.GetCurrentFunctionName(ctx), hex.EncodeToString(body)) - } - - if needCallTrampoline { - // If the total size exceeds the trampoline interval, we need to add a trampoline island. - if totalSize/trampolineInterval > len(callTrampolineIslandOffsets) { - callTrampolineIslandOffsets = append(callTrampolineIslandOffsets, totalSize) - totalSize += callTrampolineIslandSize - } - } - } - - // Allocate executable memory and then copy the generated machine code. - executable, err := platform.MmapCodeSegment(totalSize) - if err != nil { - panic(err) - } - cm.executable = executable - - for i, b := range bodies { - offset := cm.functionOffsets[i] - copy(executable[offset:], b) - } - - if wazevoapi.PerfMapEnabled { - wazevoapi.PerfMap.Flush(uintptr(unsafe.Pointer(&executable[0])), cm.functionOffsets) - } - - if needSourceInfo { - for i := range cm.sourceMap.executableOffsets { - cm.sourceMap.executableOffsets[i] += uintptr(unsafe.Pointer(&cm.executable[0])) - } - } - - // Resolve relocations for local function calls. - if len(rels) > 0 { - machine.ResolveRelocations(refToBinaryOffset, importedFns, executable, rels, callTrampolineIslandOffsets) - } - - if runtime.GOARCH == "arm64" { - // On arm64, we cannot give all of rwx at the same time, so we change it to exec. - if err = platform.MprotectRX(executable); err != nil { - return nil, err - } - } - cm.sharedFunctions = e.sharedFunctions - e.setFinalizer(cm.executables, executablesFinalizer) - return cm, nil -} - -func (e *engine) compileLocalWasmFunction( - ctx context.Context, - module *wasm.Module, - localFunctionIndex wasm.Index, - fe *frontend.Compiler, - ssaBuilder ssa.Builder, - be backend.Compiler, - needListener bool, -) (body []byte, rels []backend.RelocationInfo, err error) { - typIndex := module.FunctionSection[localFunctionIndex] - typ := &module.TypeSection[typIndex] - codeSeg := &module.CodeSection[localFunctionIndex] - - // Initializes both frontend and backend compilers. - fe.Init(localFunctionIndex, typIndex, typ, codeSeg.LocalTypes, codeSeg.Body, needListener, codeSeg.BodyOffsetInCodeSection) - be.Init() - - // Lower Wasm to SSA. - fe.LowerToSSA() - if wazevoapi.PrintSSA && wazevoapi.PrintEnabledIndex(ctx) { - fmt.Printf("[[[SSA for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), ssaBuilder.Format()) - } - - if wazevoapi.DeterministicCompilationVerifierEnabled { - wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "SSA", ssaBuilder.Format()) - } - - // Run SSA-level optimization passes. - ssaBuilder.RunPasses() - - if wazevoapi.PrintOptimizedSSA && wazevoapi.PrintEnabledIndex(ctx) { - fmt.Printf("[[[Optimized SSA for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), ssaBuilder.Format()) - } - - if wazevoapi.DeterministicCompilationVerifierEnabled { - wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "Optimized SSA", ssaBuilder.Format()) - } - - // Now our ssaBuilder contains the necessary information to further lower them to - // machine code. - original, rels, err := be.Compile(ctx) - if err != nil { - return nil, nil, fmt.Errorf("ssa->machine code: %v", err) - } - - // TODO: optimize as zero copy. - copied := make([]byte, len(original)) - copy(copied, original) - return copied, rels, nil -} - -func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener) (*compiledModule, error) { - machine := newMachine() - be := backend.NewCompiler(ctx, machine, ssa.NewBuilder()) - - num := len(module.CodeSection) - cm := &compiledModule{module: module, listeners: listeners, executables: &executables{}} - cm.functionOffsets = make([]int, num) - totalSize := 0 // Total binary size of the executable. - bodies := make([][]byte, num) - var sig ssa.Signature - for i := range module.CodeSection { - totalSize = (totalSize + 15) &^ 15 - cm.functionOffsets[i] = totalSize - - typIndex := module.FunctionSection[i] - typ := &module.TypeSection[typIndex] - - // We can relax until the index fits together in ExitCode as we do in wazevoapi.ExitCodeCallGoModuleFunctionWithIndex. - // However, 1 << 16 should be large enough for a real use case. - const hostFunctionNumMaximum = 1 << 16 - if i >= hostFunctionNumMaximum { - return nil, fmt.Errorf("too many host functions (maximum %d)", hostFunctionNumMaximum) - } - - sig.ID = ssa.SignatureID(typIndex) // This is important since we reuse the `machine` which caches the ABI based on the SignatureID. - sig.Params = append(sig.Params[:0], - ssa.TypeI64, // First argument must be exec context. - ssa.TypeI64, // The second argument is the moduleContextOpaque of this host module. - ) - for _, t := range typ.Params { - sig.Params = append(sig.Params, frontend.WasmTypeToSSAType(t)) - } - - sig.Results = sig.Results[:0] - for _, t := range typ.Results { - sig.Results = append(sig.Results, frontend.WasmTypeToSSAType(t)) - } - - c := &module.CodeSection[i] - if c.GoFunc == nil { - panic("BUG: GoFunc must be set for host module") - } - - withListener := len(listeners) > 0 && listeners[i] != nil - var exitCode wazevoapi.ExitCode - fn := c.GoFunc - switch fn.(type) { - case api.GoModuleFunction: - exitCode = wazevoapi.ExitCodeCallGoModuleFunctionWithIndex(i, withListener) - case api.GoFunction: - exitCode = wazevoapi.ExitCodeCallGoFunctionWithIndex(i, withListener) - } - - be.Init() - machine.CompileGoFunctionTrampoline(exitCode, &sig, true) - if err := be.Finalize(ctx); err != nil { - return nil, err - } - body := be.Buf() - - if wazevoapi.PerfMapEnabled { - name := module.FunctionDefinition(wasm.Index(i)).DebugName() - wazevoapi.PerfMap.AddModuleEntry(i, - int64(totalSize), - uint64(len(body)), - fmt.Sprintf("trampoline:%s", name)) - } - - // TODO: optimize as zero copy. - copied := make([]byte, len(body)) - copy(copied, body) - bodies[i] = copied - totalSize += len(body) - } - - if totalSize == 0 { - // Empty module. - return cm, nil - } - - // Allocate executable memory and then copy the generated machine code. - executable, err := platform.MmapCodeSegment(totalSize) - if err != nil { - panic(err) - } - cm.executable = executable - - for i, b := range bodies { - offset := cm.functionOffsets[i] - copy(executable[offset:], b) - } - - if wazevoapi.PerfMapEnabled { - wazevoapi.PerfMap.Flush(uintptr(unsafe.Pointer(&executable[0])), cm.functionOffsets) - } - - if runtime.GOARCH == "arm64" { - // On arm64, we cannot give all of rwx at the same time, so we change it to exec. - if err = platform.MprotectRX(executable); err != nil { - return nil, err - } - } - e.setFinalizer(cm.executables, executablesFinalizer) - return cm, nil -} - -// Close implements wasm.Engine. -func (e *engine) Close() (err error) { - e.mux.Lock() - defer e.mux.Unlock() - e.sortedCompiledModules = nil - e.compiledModules = nil - e.sharedFunctions = nil - return nil -} - -// CompiledModuleCount implements wasm.Engine. -func (e *engine) CompiledModuleCount() uint32 { - e.mux.RLock() - defer e.mux.RUnlock() - return uint32(len(e.compiledModules)) -} - -// DeleteCompiledModule implements wasm.Engine. -func (e *engine) DeleteCompiledModule(m *wasm.Module) { - e.mux.Lock() - defer e.mux.Unlock() - cm, ok := e.compiledModules[m.ID] - if ok { - if len(cm.executable) > 0 { - e.deleteCompiledModuleFromSortedList(cm) - } - delete(e.compiledModules, m.ID) - } -} - -func (e *engine) addCompiledModuleToSortedList(cm *compiledModule) { - ptr := uintptr(unsafe.Pointer(&cm.executable[0])) - - index := sort.Search(len(e.sortedCompiledModules), func(i int) bool { - return uintptr(unsafe.Pointer(&e.sortedCompiledModules[i].executable[0])) >= ptr - }) - e.sortedCompiledModules = append(e.sortedCompiledModules, nil) - copy(e.sortedCompiledModules[index+1:], e.sortedCompiledModules[index:]) - e.sortedCompiledModules[index] = cm -} - -func (e *engine) deleteCompiledModuleFromSortedList(cm *compiledModule) { - ptr := uintptr(unsafe.Pointer(&cm.executable[0])) - - index := sort.Search(len(e.sortedCompiledModules), func(i int) bool { - return uintptr(unsafe.Pointer(&e.sortedCompiledModules[i].executable[0])) >= ptr - }) - if index >= len(e.sortedCompiledModules) { - return - } - copy(e.sortedCompiledModules[index:], e.sortedCompiledModules[index+1:]) - e.sortedCompiledModules = e.sortedCompiledModules[:len(e.sortedCompiledModules)-1] -} - -func (e *engine) compiledModuleOfAddr(addr uintptr) *compiledModule { - e.mux.RLock() - defer e.mux.RUnlock() - - index := sort.Search(len(e.sortedCompiledModules), func(i int) bool { - return uintptr(unsafe.Pointer(&e.sortedCompiledModules[i].executable[0])) > addr - }) - index -= 1 - if index < 0 { - return nil - } - candidate := e.sortedCompiledModules[index] - if checkAddrInBytes(addr, candidate.executable) { - // If a module is already deleted, the found module may have been wrong. - return candidate - } - return nil -} - -func checkAddrInBytes(addr uintptr, b []byte) bool { - return uintptr(unsafe.Pointer(&b[0])) <= addr && addr <= uintptr(unsafe.Pointer(&b[len(b)-1])) -} - -// NewModuleEngine implements wasm.Engine. -func (e *engine) NewModuleEngine(m *wasm.Module, mi *wasm.ModuleInstance) (wasm.ModuleEngine, error) { - me := &moduleEngine{} - - // Note: imported functions are resolved in moduleEngine.ResolveImportedFunction. - me.importedFunctions = make([]importedFunction, m.ImportFunctionCount) - - compiled, ok := e.getCompiledModuleFromMemory(m) - if !ok { - return nil, errors.New("source module must be compiled before instantiation") - } - me.parent = compiled - me.module = mi - me.listeners = compiled.listeners - - if m.IsHostModule { - me.opaque = buildHostModuleOpaque(m, compiled.listeners) - me.opaquePtr = &me.opaque[0] - } else { - if size := compiled.offsets.TotalSize; size != 0 { - opaque := newAlignedOpaque(size) - me.opaque = opaque - me.opaquePtr = &opaque[0] - } - } - return me, nil -} - -func (e *engine) compileSharedFunctions() { - e.sharedFunctions = &sharedFunctions{ - listenerBeforeTrampolines: make(map[*wasm.FunctionType][]byte), - listenerAfterTrampolines: make(map[*wasm.FunctionType][]byte), - } - - e.be.Init() - { - src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeGrowMemory, &ssa.Signature{ - Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32}, - Results: []ssa.Type{ssa.TypeI32}, - }, false) - e.sharedFunctions.memoryGrowExecutable = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.memoryGrowExecutable - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_grow_trampoline") - } - } - - e.be.Init() - { - src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeTableGrow, &ssa.Signature{ - Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* table index */, ssa.TypeI32 /* num */, ssa.TypeI64 /* ref */}, - Results: []ssa.Type{ssa.TypeI32}, - }, false) - e.sharedFunctions.tableGrowExecutable = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.tableGrowExecutable - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "table_grow_trampoline") - } - } - - e.be.Init() - { - src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCheckModuleExitCode, &ssa.Signature{ - Params: []ssa.Type{ssa.TypeI32 /* exec context */}, - Results: []ssa.Type{ssa.TypeI32}, - }, false) - e.sharedFunctions.checkModuleExitCode = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.checkModuleExitCode - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "check_module_exit_code_trampoline") - } - } - - e.be.Init() - { - src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeRefFunc, &ssa.Signature{ - Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* function index */}, - Results: []ssa.Type{ssa.TypeI64}, // returns the function reference. - }, false) - e.sharedFunctions.refFuncExecutable = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.refFuncExecutable - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "ref_func_trampoline") - } - } - - e.be.Init() - { - src := e.machine.CompileStackGrowCallSequence() - e.sharedFunctions.stackGrowExecutable = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.stackGrowExecutable - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "stack_grow_trampoline") - } - } - - e.be.Init() - { - src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait32, &ssa.Signature{ - // exec context, timeout, expected, addr - Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI32, ssa.TypeI64}, - // Returns the status. - Results: []ssa.Type{ssa.TypeI32}, - }, false) - e.sharedFunctions.memoryWait32Executable = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.memoryWait32Executable - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait32_trampoline") - } - } - - e.be.Init() - { - src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait64, &ssa.Signature{ - // exec context, timeout, expected, addr - Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64, ssa.TypeI64}, - // Returns the status. - Results: []ssa.Type{ssa.TypeI32}, - }, false) - e.sharedFunctions.memoryWait64Executable = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.memoryWait64Executable - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait64_trampoline") - } - } - - e.be.Init() - { - src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryNotify, &ssa.Signature{ - // exec context, count, addr - Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32, ssa.TypeI64}, - // Returns the number notified. - Results: []ssa.Type{ssa.TypeI32}, - }, false) - e.sharedFunctions.memoryNotifyExecutable = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.memoryNotifyExecutable - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_notify_trampoline") - } - } - - e.setFinalizer(e.sharedFunctions, sharedFunctionsFinalizer) -} - -func sharedFunctionsFinalizer(sf *sharedFunctions) { - if err := platform.MunmapCodeSegment(sf.memoryGrowExecutable); err != nil { - panic(err) - } - if err := platform.MunmapCodeSegment(sf.checkModuleExitCode); err != nil { - panic(err) - } - if err := platform.MunmapCodeSegment(sf.stackGrowExecutable); err != nil { - panic(err) - } - if err := platform.MunmapCodeSegment(sf.tableGrowExecutable); err != nil { - panic(err) - } - if err := platform.MunmapCodeSegment(sf.refFuncExecutable); err != nil { - panic(err) - } - if err := platform.MunmapCodeSegment(sf.memoryWait32Executable); err != nil { - panic(err) - } - if err := platform.MunmapCodeSegment(sf.memoryWait64Executable); err != nil { - panic(err) - } - if err := platform.MunmapCodeSegment(sf.memoryNotifyExecutable); err != nil { - panic(err) - } - for _, f := range sf.listenerBeforeTrampolines { - if err := platform.MunmapCodeSegment(f); err != nil { - panic(err) - } - } - for _, f := range sf.listenerAfterTrampolines { - if err := platform.MunmapCodeSegment(f); err != nil { - panic(err) - } - } - - sf.memoryGrowExecutable = nil - sf.checkModuleExitCode = nil - sf.stackGrowExecutable = nil - sf.tableGrowExecutable = nil - sf.refFuncExecutable = nil - sf.memoryWait32Executable = nil - sf.memoryWait64Executable = nil - sf.memoryNotifyExecutable = nil - sf.listenerBeforeTrampolines = nil - sf.listenerAfterTrampolines = nil -} - -func executablesFinalizer(exec *executables) { - if len(exec.executable) > 0 { - if err := platform.MunmapCodeSegment(exec.executable); err != nil { - panic(err) - } - } - exec.executable = nil - - for _, f := range exec.entryPreambles { - if err := platform.MunmapCodeSegment(f); err != nil { - panic(err) - } - } - exec.entryPreambles = nil -} - -func mmapExecutable(src []byte) []byte { - executable, err := platform.MmapCodeSegment(len(src)) - if err != nil { - panic(err) - } - - copy(executable, src) - - if runtime.GOARCH == "arm64" { - // On arm64, we cannot give all of rwx at the same time, so we change it to exec. - if err = platform.MprotectRX(executable); err != nil { - panic(err) - } - } - return executable -} - -func (cm *compiledModule) functionIndexOf(addr uintptr) wasm.Index { - addr -= uintptr(unsafe.Pointer(&cm.executable[0])) - offset := cm.functionOffsets - index := sort.Search(len(offset), func(i int) bool { - return offset[i] > int(addr) - }) - index-- - if index < 0 { - panic("BUG") - } - return wasm.Index(index) -} - -func (e *engine) getListenerTrampolineForType(functionType *wasm.FunctionType) (before, after *byte) { - e.mux.Lock() - defer e.mux.Unlock() - - beforeBuf, ok := e.sharedFunctions.listenerBeforeTrampolines[functionType] - afterBuf := e.sharedFunctions.listenerAfterTrampolines[functionType] - if ok { - return &beforeBuf[0], &afterBuf[0] - } - - beforeSig, afterSig := frontend.SignatureForListener(functionType) - - e.be.Init() - buf := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerBefore, beforeSig, false) - beforeBuf = mmapExecutable(buf) - - e.be.Init() - buf = e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerAfter, afterSig, false) - afterBuf = mmapExecutable(buf) - - e.sharedFunctions.listenerBeforeTrampolines[functionType] = beforeBuf - e.sharedFunctions.listenerAfterTrampolines[functionType] = afterBuf - return &beforeBuf[0], &afterBuf[0] -} - -func (cm *compiledModule) getSourceOffset(pc uintptr) uint64 { - offsets := cm.sourceMap.executableOffsets - if len(offsets) == 0 { - return 0 - } - - index := sort.Search(len(offsets), func(i int) bool { - return offsets[i] >= pc - }) - - index-- - if index < 0 { - return 0 - } - return cm.sourceMap.wasmBinaryOffsets[index] -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go deleted file mode 100644 index e49353dc8..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go +++ /dev/null @@ -1,303 +0,0 @@ -package wazevo - -import ( - "bytes" - "context" - "crypto/sha256" - "encoding/binary" - "fmt" - "hash/crc32" - "io" - "runtime" - "unsafe" - - "github.com/tetratelabs/wazero/experimental" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" - "github.com/tetratelabs/wazero/internal/filecache" - "github.com/tetratelabs/wazero/internal/platform" - "github.com/tetratelabs/wazero/internal/u32" - "github.com/tetratelabs/wazero/internal/u64" - "github.com/tetratelabs/wazero/internal/wasm" -) - -var crc = crc32.MakeTable(crc32.Castagnoli) - -// fileCacheKey returns a key for the file cache. -// In order to avoid collisions with the existing compiler, we do not use m.ID directly, -// but instead we rehash it with magic. -func fileCacheKey(m *wasm.Module) (ret filecache.Key) { - s := sha256.New() - s.Write(m.ID[:]) - s.Write(magic) - // Write the CPU features so that we can cache the compiled module for the same CPU. - // This prevents the incompatible CPU features from being used. - cpu := platform.CpuFeatures.Raw() - // Reuse the `ret` buffer to write the first 8 bytes of the CPU features so that we can avoid the allocation. - binary.LittleEndian.PutUint64(ret[:8], cpu) - s.Write(ret[:8]) - // Finally, write the hash to the ret buffer. - s.Sum(ret[:0]) - return -} - -func (e *engine) addCompiledModule(module *wasm.Module, cm *compiledModule) (err error) { - e.addCompiledModuleToMemory(module, cm) - if !module.IsHostModule && e.fileCache != nil { - err = e.addCompiledModuleToCache(module, cm) - } - return -} - -func (e *engine) getCompiledModule(module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (cm *compiledModule, ok bool, err error) { - cm, ok = e.getCompiledModuleFromMemory(module) - if ok { - return - } - cm, ok, err = e.getCompiledModuleFromCache(module) - if ok { - cm.parent = e - cm.module = module - cm.sharedFunctions = e.sharedFunctions - cm.ensureTermination = ensureTermination - cm.offsets = wazevoapi.NewModuleContextOffsetData(module, len(listeners) > 0) - if len(listeners) > 0 { - cm.listeners = listeners - cm.listenerBeforeTrampolines = make([]*byte, len(module.TypeSection)) - cm.listenerAfterTrampolines = make([]*byte, len(module.TypeSection)) - for i := range module.TypeSection { - typ := &module.TypeSection[i] - before, after := e.getListenerTrampolineForType(typ) - cm.listenerBeforeTrampolines[i] = before - cm.listenerAfterTrampolines[i] = after - } - } - e.addCompiledModuleToMemory(module, cm) - ssaBuilder := ssa.NewBuilder() - machine := newMachine() - be := backend.NewCompiler(context.Background(), machine, ssaBuilder) - cm.executables.compileEntryPreambles(module, machine, be) - - // Set the finalizer. - e.setFinalizer(cm.executables, executablesFinalizer) - } - return -} - -func (e *engine) addCompiledModuleToMemory(m *wasm.Module, cm *compiledModule) { - e.mux.Lock() - defer e.mux.Unlock() - e.compiledModules[m.ID] = cm - if len(cm.executable) > 0 { - e.addCompiledModuleToSortedList(cm) - } -} - -func (e *engine) getCompiledModuleFromMemory(module *wasm.Module) (cm *compiledModule, ok bool) { - e.mux.RLock() - defer e.mux.RUnlock() - cm, ok = e.compiledModules[module.ID] - return -} - -func (e *engine) addCompiledModuleToCache(module *wasm.Module, cm *compiledModule) (err error) { - if e.fileCache == nil || module.IsHostModule { - return - } - err = e.fileCache.Add(fileCacheKey(module), serializeCompiledModule(e.wazeroVersion, cm)) - return -} - -func (e *engine) getCompiledModuleFromCache(module *wasm.Module) (cm *compiledModule, hit bool, err error) { - if e.fileCache == nil || module.IsHostModule { - return - } - - // Check if the entries exist in the external cache. - var cached io.ReadCloser - cached, hit, err = e.fileCache.Get(fileCacheKey(module)) - if !hit || err != nil { - return - } - - // Otherwise, we hit the cache on external cache. - // We retrieve *code structures from `cached`. - var staleCache bool - // Note: cached.Close is ensured to be called in deserializeCodes. - cm, staleCache, err = deserializeCompiledModule(e.wazeroVersion, cached) - if err != nil { - hit = false - return - } else if staleCache { - return nil, false, e.fileCache.Delete(fileCacheKey(module)) - } - return -} - -var magic = []byte{'W', 'A', 'Z', 'E', 'V', 'O'} - -func serializeCompiledModule(wazeroVersion string, cm *compiledModule) io.Reader { - buf := bytes.NewBuffer(nil) - // First 6 byte: WAZEVO header. - buf.Write(magic) - // Next 1 byte: length of version: - buf.WriteByte(byte(len(wazeroVersion))) - // Version of wazero. - buf.WriteString(wazeroVersion) - // Number of *code (== locally defined functions in the module): 4 bytes. - buf.Write(u32.LeBytes(uint32(len(cm.functionOffsets)))) - for _, offset := range cm.functionOffsets { - // The offset of this function in the executable (8 bytes). - buf.Write(u64.LeBytes(uint64(offset))) - } - // The length of code segment (8 bytes). - buf.Write(u64.LeBytes(uint64(len(cm.executable)))) - // Append the native code. - buf.Write(cm.executable) - // Append checksum. - checksum := crc32.Checksum(cm.executable, crc) - buf.Write(u32.LeBytes(checksum)) - if sm := cm.sourceMap; len(sm.executableOffsets) > 0 { - buf.WriteByte(1) // indicates that source map is present. - l := len(sm.wasmBinaryOffsets) - buf.Write(u64.LeBytes(uint64(l))) - executableAddr := uintptr(unsafe.Pointer(&cm.executable[0])) - for i := 0; i < l; i++ { - buf.Write(u64.LeBytes(sm.wasmBinaryOffsets[i])) - // executableOffsets is absolute address, so we need to subtract executableAddr. - buf.Write(u64.LeBytes(uint64(sm.executableOffsets[i] - executableAddr))) - } - } else { - buf.WriteByte(0) // indicates that source map is not present. - } - return bytes.NewReader(buf.Bytes()) -} - -func deserializeCompiledModule(wazeroVersion string, reader io.ReadCloser) (cm *compiledModule, staleCache bool, err error) { - defer reader.Close() - cacheHeaderSize := len(magic) + 1 /* version size */ + len(wazeroVersion) + 4 /* number of functions */ - - // Read the header before the native code. - header := make([]byte, cacheHeaderSize) - n, err := reader.Read(header) - if err != nil { - return nil, false, fmt.Errorf("compilationcache: error reading header: %v", err) - } - - if n != cacheHeaderSize { - return nil, false, fmt.Errorf("compilationcache: invalid header length: %d", n) - } - - if !bytes.Equal(header[:len(magic)], magic) { - return nil, false, fmt.Errorf( - "compilationcache: invalid magic number: got %s but want %s", magic, header[:len(magic)]) - } - - // Check the version compatibility. - versionSize := int(header[len(magic)]) - - cachedVersionBegin, cachedVersionEnd := len(magic)+1, len(magic)+1+versionSize - if cachedVersionEnd >= len(header) { - staleCache = true - return - } else if cachedVersion := string(header[cachedVersionBegin:cachedVersionEnd]); cachedVersion != wazeroVersion { - staleCache = true - return - } - - functionsNum := binary.LittleEndian.Uint32(header[len(header)-4:]) - cm = &compiledModule{functionOffsets: make([]int, functionsNum), executables: &executables{}} - - var eightBytes [8]byte - for i := uint32(0); i < functionsNum; i++ { - // Read the offset of each function in the executable. - var offset uint64 - if offset, err = readUint64(reader, &eightBytes); err != nil { - err = fmt.Errorf("compilationcache: error reading func[%d] executable offset: %v", i, err) - return - } - cm.functionOffsets[i] = int(offset) - } - - executableLen, err := readUint64(reader, &eightBytes) - if err != nil { - err = fmt.Errorf("compilationcache: error reading executable size: %v", err) - return - } - - if executableLen > 0 { - executable, err := platform.MmapCodeSegment(int(executableLen)) - if err != nil { - err = fmt.Errorf("compilationcache: error mmapping executable (len=%d): %v", executableLen, err) - return nil, false, err - } - - _, err = io.ReadFull(reader, executable) - if err != nil { - err = fmt.Errorf("compilationcache: error reading executable (len=%d): %v", executableLen, err) - return nil, false, err - } - - expected := crc32.Checksum(executable, crc) - if _, err = io.ReadFull(reader, eightBytes[:4]); err != nil { - return nil, false, fmt.Errorf("compilationcache: could not read checksum: %v", err) - } else if checksum := binary.LittleEndian.Uint32(eightBytes[:4]); expected != checksum { - return nil, false, fmt.Errorf("compilationcache: checksum mismatch (expected %d, got %d)", expected, checksum) - } - - if runtime.GOARCH == "arm64" { - // On arm64, we cannot give all of rwx at the same time, so we change it to exec. - if err = platform.MprotectRX(executable); err != nil { - return nil, false, err - } - } - cm.executable = executable - } - - if _, err := io.ReadFull(reader, eightBytes[:1]); err != nil { - return nil, false, fmt.Errorf("compilationcache: error reading source map presence: %v", err) - } - - if eightBytes[0] == 1 { - sm := &cm.sourceMap - sourceMapLen, err := readUint64(reader, &eightBytes) - if err != nil { - err = fmt.Errorf("compilationcache: error reading source map length: %v", err) - return nil, false, err - } - executableOffset := uintptr(unsafe.Pointer(&cm.executable[0])) - for i := uint64(0); i < sourceMapLen; i++ { - wasmBinaryOffset, err := readUint64(reader, &eightBytes) - if err != nil { - err = fmt.Errorf("compilationcache: error reading source map[%d] wasm binary offset: %v", i, err) - return nil, false, err - } - executableRelativeOffset, err := readUint64(reader, &eightBytes) - if err != nil { - err = fmt.Errorf("compilationcache: error reading source map[%d] executable offset: %v", i, err) - return nil, false, err - } - sm.wasmBinaryOffsets = append(sm.wasmBinaryOffsets, wasmBinaryOffset) - // executableOffsets is absolute address, so we need to add executableOffset. - sm.executableOffsets = append(sm.executableOffsets, uintptr(executableRelativeOffset)+executableOffset) - } - } - return -} - -// readUint64 strictly reads an uint64 in little-endian byte order, using the -// given array as a buffer. This returns io.EOF if less than 8 bytes were read. -func readUint64(reader io.Reader, b *[8]byte) (uint64, error) { - s := b[0:8] - n, err := reader.Read(s) - if err != nil { - return 0, err - } else if n < 8 { // more strict than reader.Read - return 0, io.EOF - } - - // Read the u64 from the underlying buffer. - ret := binary.LittleEndian.Uint64(s) - return ret, nil -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_amd64.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_amd64.go deleted file mode 100644 index 18f60af3a..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_amd64.go +++ /dev/null @@ -1,15 +0,0 @@ -//go:build amd64 && !tinygo - -package wazevo - -import _ "unsafe" - -// entrypoint is implemented by the backend. -// -//go:linkname entrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64.entrypoint -func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultStackPtr *uint64, goAllocatedStackSlicePtr uintptr) - -// entrypoint is implemented by the backend. -// -//go:linkname afterGoFunctionCallEntrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64.afterGoFunctionCallEntrypoint -func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_arm64.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_arm64.go deleted file mode 100644 index e16d64f65..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_arm64.go +++ /dev/null @@ -1,15 +0,0 @@ -//go:build arm64 && !tinygo - -package wazevo - -import _ "unsafe" - -// entrypoint is implemented by the backend. -// -//go:linkname entrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64.entrypoint -func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultStackPtr *uint64, goAllocatedStackSlicePtr uintptr) - -// entrypoint is implemented by the backend. -// -//go:linkname afterGoFunctionCallEntrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64.afterGoFunctionCallEntrypoint -func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_others.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_others.go deleted file mode 100644 index 8f9d64b2b..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_others.go +++ /dev/null @@ -1,15 +0,0 @@ -//go:build (!arm64 && !amd64) || tinygo - -package wazevo - -import ( - "runtime" -) - -func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultStackPtr *uint64, goAllocatedStackSlicePtr uintptr) { - panic(runtime.GOARCH) -} - -func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr) { - panic(runtime.GOARCH) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go deleted file mode 100644 index eebdba034..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go +++ /dev/null @@ -1,567 +0,0 @@ -// Package frontend implements the translation of WebAssembly to SSA IR using the ssa package. -package frontend - -import ( - "bytes" - "math" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" - "github.com/tetratelabs/wazero/internal/wasm" -) - -// Compiler is in charge of lowering Wasm to SSA IR, and does the optimization -// on top of it in architecture-independent way. -type Compiler struct { - // Per-module data that is used across all functions. - - m *wasm.Module - offset *wazevoapi.ModuleContextOffsetData - // ssaBuilder is a ssa.Builder used by this frontend. - ssaBuilder ssa.Builder - signatures map[*wasm.FunctionType]*ssa.Signature - listenerSignatures map[*wasm.FunctionType][2]*ssa.Signature - memoryGrowSig ssa.Signature - memoryWait32Sig ssa.Signature - memoryWait64Sig ssa.Signature - memoryNotifySig ssa.Signature - checkModuleExitCodeSig ssa.Signature - tableGrowSig ssa.Signature - refFuncSig ssa.Signature - memmoveSig ssa.Signature - ensureTermination bool - - // Followings are reset by per function. - - // wasmLocalToVariable maps the index (considered as wasm.Index of locals) - // to the corresponding ssa.Variable. - wasmLocalToVariable [] /* local index to */ ssa.Variable - wasmLocalFunctionIndex wasm.Index - wasmFunctionTypeIndex wasm.Index - wasmFunctionTyp *wasm.FunctionType - wasmFunctionLocalTypes []wasm.ValueType - wasmFunctionBody []byte - wasmFunctionBodyOffsetInCodeSection uint64 - memoryBaseVariable, memoryLenVariable ssa.Variable - needMemory bool - memoryShared bool - globalVariables []ssa.Variable - globalVariablesTypes []ssa.Type - mutableGlobalVariablesIndexes []wasm.Index // index to ^. - needListener bool - needSourceOffsetInfo bool - // br is reused during lowering. - br *bytes.Reader - loweringState loweringState - - knownSafeBounds [] /* ssa.ValueID to */ knownSafeBound - knownSafeBoundsSet []ssa.ValueID - - knownSafeBoundsAtTheEndOfBlocks [] /* ssa.BlockID to */ knownSafeBoundsAtTheEndOfBlock - varLengthKnownSafeBoundWithIDPool wazevoapi.VarLengthPool[knownSafeBoundWithID] - - execCtxPtrValue, moduleCtxPtrValue ssa.Value - - // Following are reused for the known safe bounds analysis. - - pointers []int - bounds [][]knownSafeBoundWithID -} - -type ( - // knownSafeBound represents a known safe bound for a value. - knownSafeBound struct { - // bound is a constant upper bound for the value. - bound uint64 - // absoluteAddr is the absolute address of the value. - absoluteAddr ssa.Value - } - // knownSafeBoundWithID is a knownSafeBound with the ID of the value. - knownSafeBoundWithID struct { - knownSafeBound - id ssa.ValueID - } - knownSafeBoundsAtTheEndOfBlock = wazevoapi.VarLength[knownSafeBoundWithID] -) - -var knownSafeBoundsAtTheEndOfBlockNil = wazevoapi.NewNilVarLength[knownSafeBoundWithID]() - -// NewFrontendCompiler returns a frontend Compiler. -func NewFrontendCompiler(m *wasm.Module, ssaBuilder ssa.Builder, offset *wazevoapi.ModuleContextOffsetData, ensureTermination bool, listenerOn bool, sourceInfo bool) *Compiler { - c := &Compiler{ - m: m, - ssaBuilder: ssaBuilder, - br: bytes.NewReader(nil), - offset: offset, - ensureTermination: ensureTermination, - needSourceOffsetInfo: sourceInfo, - varLengthKnownSafeBoundWithIDPool: wazevoapi.NewVarLengthPool[knownSafeBoundWithID](), - } - c.declareSignatures(listenerOn) - return c -} - -func (c *Compiler) declareSignatures(listenerOn bool) { - m := c.m - c.signatures = make(map[*wasm.FunctionType]*ssa.Signature, len(m.TypeSection)+2) - if listenerOn { - c.listenerSignatures = make(map[*wasm.FunctionType][2]*ssa.Signature, len(m.TypeSection)) - } - for i := range m.TypeSection { - wasmSig := &m.TypeSection[i] - sig := SignatureForWasmFunctionType(wasmSig) - sig.ID = ssa.SignatureID(i) - c.signatures[wasmSig] = &sig - c.ssaBuilder.DeclareSignature(&sig) - - if listenerOn { - beforeSig, afterSig := SignatureForListener(wasmSig) - beforeSig.ID = ssa.SignatureID(i) + ssa.SignatureID(len(m.TypeSection)) - afterSig.ID = ssa.SignatureID(i) + ssa.SignatureID(len(m.TypeSection))*2 - c.listenerSignatures[wasmSig] = [2]*ssa.Signature{beforeSig, afterSig} - c.ssaBuilder.DeclareSignature(beforeSig) - c.ssaBuilder.DeclareSignature(afterSig) - } - } - - begin := ssa.SignatureID(len(m.TypeSection)) - if listenerOn { - begin *= 3 - } - c.memoryGrowSig = ssa.Signature{ - ID: begin, - // Takes execution context and the page size to grow. - Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32}, - // Returns the previous page size. - Results: []ssa.Type{ssa.TypeI32}, - } - c.ssaBuilder.DeclareSignature(&c.memoryGrowSig) - - c.checkModuleExitCodeSig = ssa.Signature{ - ID: c.memoryGrowSig.ID + 1, - // Only takes execution context. - Params: []ssa.Type{ssa.TypeI64}, - } - c.ssaBuilder.DeclareSignature(&c.checkModuleExitCodeSig) - - c.tableGrowSig = ssa.Signature{ - ID: c.checkModuleExitCodeSig.ID + 1, - Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* table index */, ssa.TypeI32 /* num */, ssa.TypeI64 /* ref */}, - // Returns the previous size. - Results: []ssa.Type{ssa.TypeI32}, - } - c.ssaBuilder.DeclareSignature(&c.tableGrowSig) - - c.refFuncSig = ssa.Signature{ - ID: c.tableGrowSig.ID + 1, - Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* func index */}, - // Returns the function reference. - Results: []ssa.Type{ssa.TypeI64}, - } - c.ssaBuilder.DeclareSignature(&c.refFuncSig) - - c.memmoveSig = ssa.Signature{ - ID: c.refFuncSig.ID + 1, - // dst, src, and the byte count. - Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64}, - } - - c.ssaBuilder.DeclareSignature(&c.memmoveSig) - - c.memoryWait32Sig = ssa.Signature{ - ID: c.memmoveSig.ID + 1, - // exec context, timeout, expected, addr - Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI32, ssa.TypeI64}, - // Returns the status. - Results: []ssa.Type{ssa.TypeI32}, - } - c.ssaBuilder.DeclareSignature(&c.memoryWait32Sig) - - c.memoryWait64Sig = ssa.Signature{ - ID: c.memoryWait32Sig.ID + 1, - // exec context, timeout, expected, addr - Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64, ssa.TypeI64}, - // Returns the status. - Results: []ssa.Type{ssa.TypeI32}, - } - c.ssaBuilder.DeclareSignature(&c.memoryWait64Sig) - - c.memoryNotifySig = ssa.Signature{ - ID: c.memoryWait64Sig.ID + 1, - // exec context, count, addr - Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32, ssa.TypeI64}, - // Returns the number notified. - Results: []ssa.Type{ssa.TypeI32}, - } - c.ssaBuilder.DeclareSignature(&c.memoryNotifySig) -} - -// SignatureForWasmFunctionType returns the ssa.Signature for the given wasm.FunctionType. -func SignatureForWasmFunctionType(typ *wasm.FunctionType) ssa.Signature { - sig := ssa.Signature{ - // +2 to pass moduleContextPtr and executionContextPtr. See the inline comment LowerToSSA. - Params: make([]ssa.Type, len(typ.Params)+2), - Results: make([]ssa.Type, len(typ.Results)), - } - sig.Params[0] = executionContextPtrTyp - sig.Params[1] = moduleContextPtrTyp - for j, typ := range typ.Params { - sig.Params[j+2] = WasmTypeToSSAType(typ) - } - for j, typ := range typ.Results { - sig.Results[j] = WasmTypeToSSAType(typ) - } - return sig -} - -// Init initializes the state of frontendCompiler and make it ready for a next function. -func (c *Compiler) Init(idx, typIndex wasm.Index, typ *wasm.FunctionType, localTypes []wasm.ValueType, body []byte, needListener bool, bodyOffsetInCodeSection uint64) { - c.ssaBuilder.Init(c.signatures[typ]) - c.loweringState.reset() - - c.wasmFunctionTypeIndex = typIndex - c.wasmLocalFunctionIndex = idx - c.wasmFunctionTyp = typ - c.wasmFunctionLocalTypes = localTypes - c.wasmFunctionBody = body - c.wasmFunctionBodyOffsetInCodeSection = bodyOffsetInCodeSection - c.needListener = needListener - c.clearSafeBounds() - c.varLengthKnownSafeBoundWithIDPool.Reset() - c.knownSafeBoundsAtTheEndOfBlocks = c.knownSafeBoundsAtTheEndOfBlocks[:0] -} - -// Note: this assumes 64-bit platform (I believe we won't have 32-bit backend ;)). -const executionContextPtrTyp, moduleContextPtrTyp = ssa.TypeI64, ssa.TypeI64 - -// LowerToSSA lowers the current function to SSA function which will be held by ssaBuilder. -// After calling this, the caller will be able to access the SSA info in *Compiler.ssaBuilder. -// -// Note that this only does the naive lowering, and do not do any optimization, instead the caller is expected to do so. -func (c *Compiler) LowerToSSA() { - builder := c.ssaBuilder - - // Set up the entry block. - entryBlock := builder.AllocateBasicBlock() - builder.SetCurrentBlock(entryBlock) - - // Functions always take two parameters in addition to Wasm-level parameters: - // - // 1. executionContextPtr: pointer to the *executionContext in wazevo package. - // This will be used to exit the execution in the face of trap, plus used for host function calls. - // - // 2. moduleContextPtr: pointer to the *moduleContextOpaque in wazevo package. - // This will be used to access memory, etc. Also, this will be used during host function calls. - // - // Note: it's clear that sometimes a function won't need them. For example, - // if the function doesn't trap and doesn't make function call, then - // we might be able to eliminate the parameter. However, if that function - // can be called via call_indirect, then we cannot eliminate because the - // signature won't match with the expected one. - // TODO: maybe there's some way to do this optimization without glitches, but so far I have no clue about the feasibility. - // - // Note: In Wasmtime or many other runtimes, moduleContextPtr is called "vmContext". Also note that `moduleContextPtr` - // is wazero-specific since other runtimes can naturally use the OS-level signal to do this job thanks to the fact that - // they can use native stack vs wazero cannot use Go-routine stack and have to use Go-runtime allocated []byte as a stack. - c.execCtxPtrValue = entryBlock.AddParam(builder, executionContextPtrTyp) - c.moduleCtxPtrValue = entryBlock.AddParam(builder, moduleContextPtrTyp) - builder.AnnotateValue(c.execCtxPtrValue, "exec_ctx") - builder.AnnotateValue(c.moduleCtxPtrValue, "module_ctx") - - for i, typ := range c.wasmFunctionTyp.Params { - st := WasmTypeToSSAType(typ) - variable := builder.DeclareVariable(st) - value := entryBlock.AddParam(builder, st) - builder.DefineVariable(variable, value, entryBlock) - c.setWasmLocalVariable(wasm.Index(i), variable) - } - c.declareWasmLocals() - c.declareNecessaryVariables() - - c.lowerBody(entryBlock) -} - -// localVariable returns the SSA variable for the given Wasm local index. -func (c *Compiler) localVariable(index wasm.Index) ssa.Variable { - return c.wasmLocalToVariable[index] -} - -func (c *Compiler) setWasmLocalVariable(index wasm.Index, variable ssa.Variable) { - idx := int(index) - if idx >= len(c.wasmLocalToVariable) { - c.wasmLocalToVariable = append(c.wasmLocalToVariable, make([]ssa.Variable, idx+1-len(c.wasmLocalToVariable))...) - } - c.wasmLocalToVariable[idx] = variable -} - -// declareWasmLocals declares the SSA variables for the Wasm locals. -func (c *Compiler) declareWasmLocals() { - localCount := wasm.Index(len(c.wasmFunctionTyp.Params)) - for i, typ := range c.wasmFunctionLocalTypes { - st := WasmTypeToSSAType(typ) - variable := c.ssaBuilder.DeclareVariable(st) - c.setWasmLocalVariable(wasm.Index(i)+localCount, variable) - c.ssaBuilder.InsertZeroValue(st) - } -} - -func (c *Compiler) declareNecessaryVariables() { - if c.needMemory = c.m.MemorySection != nil; c.needMemory { - c.memoryShared = c.m.MemorySection.IsShared - } else if c.needMemory = c.m.ImportMemoryCount > 0; c.needMemory { - for _, imp := range c.m.ImportSection { - if imp.Type == wasm.ExternTypeMemory { - c.memoryShared = imp.DescMem.IsShared - break - } - } - } - - if c.needMemory { - c.memoryBaseVariable = c.ssaBuilder.DeclareVariable(ssa.TypeI64) - c.memoryLenVariable = c.ssaBuilder.DeclareVariable(ssa.TypeI64) - } - - c.globalVariables = c.globalVariables[:0] - c.mutableGlobalVariablesIndexes = c.mutableGlobalVariablesIndexes[:0] - c.globalVariablesTypes = c.globalVariablesTypes[:0] - for _, imp := range c.m.ImportSection { - if imp.Type == wasm.ExternTypeGlobal { - desc := imp.DescGlobal - c.declareWasmGlobal(desc.ValType, desc.Mutable) - } - } - for _, g := range c.m.GlobalSection { - desc := g.Type - c.declareWasmGlobal(desc.ValType, desc.Mutable) - } - - // TODO: add tables. -} - -func (c *Compiler) declareWasmGlobal(typ wasm.ValueType, mutable bool) { - var st ssa.Type - switch typ { - case wasm.ValueTypeI32: - st = ssa.TypeI32 - case wasm.ValueTypeI64, - // Both externref and funcref are represented as I64 since we only support 64-bit platforms. - wasm.ValueTypeExternref, wasm.ValueTypeFuncref: - st = ssa.TypeI64 - case wasm.ValueTypeF32: - st = ssa.TypeF32 - case wasm.ValueTypeF64: - st = ssa.TypeF64 - case wasm.ValueTypeV128: - st = ssa.TypeV128 - default: - panic("TODO: " + wasm.ValueTypeName(typ)) - } - v := c.ssaBuilder.DeclareVariable(st) - index := wasm.Index(len(c.globalVariables)) - c.globalVariables = append(c.globalVariables, v) - c.globalVariablesTypes = append(c.globalVariablesTypes, st) - if mutable { - c.mutableGlobalVariablesIndexes = append(c.mutableGlobalVariablesIndexes, index) - } -} - -// WasmTypeToSSAType converts wasm.ValueType to ssa.Type. -func WasmTypeToSSAType(vt wasm.ValueType) ssa.Type { - switch vt { - case wasm.ValueTypeI32: - return ssa.TypeI32 - case wasm.ValueTypeI64, - // Both externref and funcref are represented as I64 since we only support 64-bit platforms. - wasm.ValueTypeExternref, wasm.ValueTypeFuncref: - return ssa.TypeI64 - case wasm.ValueTypeF32: - return ssa.TypeF32 - case wasm.ValueTypeF64: - return ssa.TypeF64 - case wasm.ValueTypeV128: - return ssa.TypeV128 - default: - panic("TODO: " + wasm.ValueTypeName(vt)) - } -} - -// addBlockParamsFromWasmTypes adds the block parameters to the given block. -func (c *Compiler) addBlockParamsFromWasmTypes(tps []wasm.ValueType, blk ssa.BasicBlock) { - for _, typ := range tps { - st := WasmTypeToSSAType(typ) - blk.AddParam(c.ssaBuilder, st) - } -} - -// formatBuilder outputs the constructed SSA function as a string with a source information. -func (c *Compiler) formatBuilder() string { - return c.ssaBuilder.Format() -} - -// SignatureForListener returns the signatures for the listener functions. -func SignatureForListener(wasmSig *wasm.FunctionType) (*ssa.Signature, *ssa.Signature) { - beforeSig := &ssa.Signature{} - beforeSig.Params = make([]ssa.Type, len(wasmSig.Params)+2) - beforeSig.Params[0] = ssa.TypeI64 // Execution context. - beforeSig.Params[1] = ssa.TypeI32 // Function index. - for i, p := range wasmSig.Params { - beforeSig.Params[i+2] = WasmTypeToSSAType(p) - } - afterSig := &ssa.Signature{} - afterSig.Params = make([]ssa.Type, len(wasmSig.Results)+2) - afterSig.Params[0] = ssa.TypeI64 // Execution context. - afterSig.Params[1] = ssa.TypeI32 // Function index. - for i, p := range wasmSig.Results { - afterSig.Params[i+2] = WasmTypeToSSAType(p) - } - return beforeSig, afterSig -} - -// isBoundSafe returns true if the given value is known to be safe to access up to the given bound. -func (c *Compiler) getKnownSafeBound(v ssa.ValueID) *knownSafeBound { - if int(v) >= len(c.knownSafeBounds) { - return nil - } - return &c.knownSafeBounds[v] -} - -// recordKnownSafeBound records the given safe bound for the given value. -func (c *Compiler) recordKnownSafeBound(v ssa.ValueID, safeBound uint64, absoluteAddr ssa.Value) { - if int(v) >= len(c.knownSafeBounds) { - c.knownSafeBounds = append(c.knownSafeBounds, make([]knownSafeBound, v+1)...) - } - - if exiting := c.knownSafeBounds[v]; exiting.bound == 0 { - c.knownSafeBounds[v] = knownSafeBound{ - bound: safeBound, - absoluteAddr: absoluteAddr, - } - c.knownSafeBoundsSet = append(c.knownSafeBoundsSet, v) - } else if safeBound > exiting.bound { - c.knownSafeBounds[v].bound = safeBound - } -} - -// clearSafeBounds clears the known safe bounds. -func (c *Compiler) clearSafeBounds() { - for _, v := range c.knownSafeBoundsSet { - ptr := &c.knownSafeBounds[v] - ptr.bound = 0 - ptr.absoluteAddr = ssa.ValueInvalid - } - c.knownSafeBoundsSet = c.knownSafeBoundsSet[:0] -} - -// resetAbsoluteAddressInSafeBounds resets the absolute addresses recorded in the known safe bounds. -func (c *Compiler) resetAbsoluteAddressInSafeBounds() { - for _, v := range c.knownSafeBoundsSet { - ptr := &c.knownSafeBounds[v] - ptr.absoluteAddr = ssa.ValueInvalid - } -} - -func (k *knownSafeBound) valid() bool { - return k != nil && k.bound > 0 -} - -func (c *Compiler) allocateVarLengthValues(_cap int, vs ...ssa.Value) ssa.Values { - builder := c.ssaBuilder - pool := builder.VarLengthPool() - args := pool.Allocate(_cap) - args = args.Append(builder.VarLengthPool(), vs...) - return args -} - -func (c *Compiler) finalizeKnownSafeBoundsAtTheEndOfBlock(bID ssa.BasicBlockID) { - _bID := int(bID) - if l := len(c.knownSafeBoundsAtTheEndOfBlocks); _bID >= l { - c.knownSafeBoundsAtTheEndOfBlocks = append(c.knownSafeBoundsAtTheEndOfBlocks, - make([]knownSafeBoundsAtTheEndOfBlock, _bID+1-len(c.knownSafeBoundsAtTheEndOfBlocks))...) - for i := l; i < len(c.knownSafeBoundsAtTheEndOfBlocks); i++ { - c.knownSafeBoundsAtTheEndOfBlocks[i] = knownSafeBoundsAtTheEndOfBlockNil - } - } - p := &c.varLengthKnownSafeBoundWithIDPool - size := len(c.knownSafeBoundsSet) - allocated := c.varLengthKnownSafeBoundWithIDPool.Allocate(size) - // Sort the known safe bounds by the value ID so that we can use the intersection algorithm in initializeCurrentBlockKnownBounds. - sortSSAValueIDs(c.knownSafeBoundsSet) - for _, vID := range c.knownSafeBoundsSet { - kb := c.knownSafeBounds[vID] - allocated = allocated.Append(p, knownSafeBoundWithID{ - knownSafeBound: kb, - id: vID, - }) - } - c.knownSafeBoundsAtTheEndOfBlocks[bID] = allocated - c.clearSafeBounds() -} - -func (c *Compiler) initializeCurrentBlockKnownBounds() { - currentBlk := c.ssaBuilder.CurrentBlock() - switch preds := currentBlk.Preds(); preds { - case 0: - case 1: - pred := currentBlk.Pred(0).ID() - for _, kb := range c.getKnownSafeBoundsAtTheEndOfBlocks(pred).View() { - // Unless the block is sealed, we cannot assume the absolute address is valid: - // later we might add another predecessor that has no visibility of that value. - addr := ssa.ValueInvalid - if currentBlk.Sealed() { - addr = kb.absoluteAddr - } - c.recordKnownSafeBound(kb.id, kb.bound, addr) - } - default: - c.pointers = c.pointers[:0] - c.bounds = c.bounds[:0] - for i := 0; i < preds; i++ { - c.bounds = append(c.bounds, c.getKnownSafeBoundsAtTheEndOfBlocks(currentBlk.Pred(i).ID()).View()) - c.pointers = append(c.pointers, 0) - } - - // If there are multiple predecessors, we need to find the intersection of the known safe bounds. - - outer: - for { - smallestID := ssa.ValueID(math.MaxUint32) - for i, ptr := range c.pointers { - if ptr >= len(c.bounds[i]) { - break outer - } - cb := &c.bounds[i][ptr] - if id := cb.id; id < smallestID { - smallestID = cb.id - } - } - - // Check if current elements are the same across all lists. - same := true - minBound := uint64(math.MaxUint64) - for i := 0; i < preds; i++ { - cb := &c.bounds[i][c.pointers[i]] - if cb.id != smallestID { - same = false - } else { - if cb.bound < minBound { - minBound = cb.bound - } - c.pointers[i]++ - } - } - - if same { // All elements are the same. - // Absolute address cannot be used in the intersection since the value might be only defined in one of the predecessors. - c.recordKnownSafeBound(smallestID, minBound, ssa.ValueInvalid) - } - } - } -} - -func (c *Compiler) getKnownSafeBoundsAtTheEndOfBlocks(id ssa.BasicBlockID) knownSafeBoundsAtTheEndOfBlock { - if int(id) >= len(c.knownSafeBoundsAtTheEndOfBlocks) { - return knownSafeBoundsAtTheEndOfBlockNil - } - return c.knownSafeBoundsAtTheEndOfBlocks[id] -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go deleted file mode 100644 index e73debbd1..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go +++ /dev/null @@ -1,4261 +0,0 @@ -package frontend - -import ( - "encoding/binary" - "fmt" - "math" - "runtime" - "strings" - - "github.com/tetratelabs/wazero/api" - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" - "github.com/tetratelabs/wazero/internal/leb128" - "github.com/tetratelabs/wazero/internal/wasm" -) - -type ( - // loweringState is used to keep the state of lowering. - loweringState struct { - // values holds the values on the Wasm stack. - values []ssa.Value - controlFrames []controlFrame - unreachable bool - unreachableDepth int - tmpForBrTable []uint32 - pc int - } - controlFrame struct { - kind controlFrameKind - // originalStackLen holds the number of values on the Wasm stack - // when start executing this control frame minus params for the block. - originalStackLenWithoutParam int - // blk is the loop header if this is loop, and is the else-block if this is an if frame. - blk, - // followingBlock is the basic block we enter if we reach "end" of block. - followingBlock ssa.BasicBlock - blockType *wasm.FunctionType - // clonedArgs hold the arguments to Else block. - clonedArgs ssa.Values - } - - controlFrameKind byte -) - -// String implements fmt.Stringer for debugging. -func (l *loweringState) String() string { - var str []string - for _, v := range l.values { - str = append(str, fmt.Sprintf("v%v", v.ID())) - } - var frames []string - for i := range l.controlFrames { - frames = append(frames, l.controlFrames[i].kind.String()) - } - return fmt.Sprintf("\n\tunreachable=%v(depth=%d)\n\tstack: %s\n\tcontrol frames: %s", - l.unreachable, l.unreachableDepth, - strings.Join(str, ", "), - strings.Join(frames, ", "), - ) -} - -const ( - controlFrameKindFunction = iota + 1 - controlFrameKindLoop - controlFrameKindIfWithElse - controlFrameKindIfWithoutElse - controlFrameKindBlock -) - -// String implements fmt.Stringer for debugging. -func (k controlFrameKind) String() string { - switch k { - case controlFrameKindFunction: - return "function" - case controlFrameKindLoop: - return "loop" - case controlFrameKindIfWithElse: - return "if_with_else" - case controlFrameKindIfWithoutElse: - return "if_without_else" - case controlFrameKindBlock: - return "block" - default: - panic(k) - } -} - -// isLoop returns true if this is a loop frame. -func (ctrl *controlFrame) isLoop() bool { - return ctrl.kind == controlFrameKindLoop -} - -// reset resets the state of loweringState for reuse. -func (l *loweringState) reset() { - l.values = l.values[:0] - l.controlFrames = l.controlFrames[:0] - l.pc = 0 - l.unreachable = false - l.unreachableDepth = 0 -} - -func (l *loweringState) peek() (ret ssa.Value) { - tail := len(l.values) - 1 - return l.values[tail] -} - -func (l *loweringState) pop() (ret ssa.Value) { - tail := len(l.values) - 1 - ret = l.values[tail] - l.values = l.values[:tail] - return -} - -func (l *loweringState) push(ret ssa.Value) { - l.values = append(l.values, ret) -} - -func (c *Compiler) nPeekDup(n int) ssa.Values { - if n == 0 { - return ssa.ValuesNil - } - - l := c.state() - tail := len(l.values) - - args := c.allocateVarLengthValues(n) - args = args.Append(c.ssaBuilder.VarLengthPool(), l.values[tail-n:tail]...) - return args -} - -func (l *loweringState) ctrlPop() (ret controlFrame) { - tail := len(l.controlFrames) - 1 - ret = l.controlFrames[tail] - l.controlFrames = l.controlFrames[:tail] - return -} - -func (l *loweringState) ctrlPush(ret controlFrame) { - l.controlFrames = append(l.controlFrames, ret) -} - -func (l *loweringState) ctrlPeekAt(n int) (ret *controlFrame) { - tail := len(l.controlFrames) - 1 - return &l.controlFrames[tail-n] -} - -// lowerBody lowers the body of the Wasm function to the SSA form. -func (c *Compiler) lowerBody(entryBlk ssa.BasicBlock) { - c.ssaBuilder.Seal(entryBlk) - - if c.needListener { - c.callListenerBefore() - } - - // Pushes the empty control frame which corresponds to the function return. - c.loweringState.ctrlPush(controlFrame{ - kind: controlFrameKindFunction, - blockType: c.wasmFunctionTyp, - followingBlock: c.ssaBuilder.ReturnBlock(), - }) - - for c.loweringState.pc < len(c.wasmFunctionBody) { - blkBeforeLowering := c.ssaBuilder.CurrentBlock() - c.lowerCurrentOpcode() - blkAfterLowering := c.ssaBuilder.CurrentBlock() - if blkBeforeLowering != blkAfterLowering { - // In Wasm, once a block exits, that means we've done compiling the block. - // Therefore, we finalize the known bounds at the end of the block for the exiting block. - c.finalizeKnownSafeBoundsAtTheEndOfBlock(blkBeforeLowering.ID()) - // After that, we initialize the known bounds for the new compilation target block. - c.initializeCurrentBlockKnownBounds() - } - } -} - -func (c *Compiler) state() *loweringState { - return &c.loweringState -} - -func (c *Compiler) lowerCurrentOpcode() { - op := c.wasmFunctionBody[c.loweringState.pc] - - if c.needSourceOffsetInfo { - c.ssaBuilder.SetCurrentSourceOffset( - ssa.SourceOffset(c.loweringState.pc) + ssa.SourceOffset(c.wasmFunctionBodyOffsetInCodeSection), - ) - } - - builder := c.ssaBuilder - state := c.state() - switch op { - case wasm.OpcodeI32Const: - c := c.readI32s() - if state.unreachable { - break - } - - iconst := builder.AllocateInstruction().AsIconst32(uint32(c)).Insert(builder) - value := iconst.Return() - state.push(value) - case wasm.OpcodeI64Const: - c := c.readI64s() - if state.unreachable { - break - } - iconst := builder.AllocateInstruction().AsIconst64(uint64(c)).Insert(builder) - value := iconst.Return() - state.push(value) - case wasm.OpcodeF32Const: - f32 := c.readF32() - if state.unreachable { - break - } - f32const := builder.AllocateInstruction(). - AsF32const(f32). - Insert(builder). - Return() - state.push(f32const) - case wasm.OpcodeF64Const: - f64 := c.readF64() - if state.unreachable { - break - } - f64const := builder.AllocateInstruction(). - AsF64const(f64). - Insert(builder). - Return() - state.push(f64const) - case wasm.OpcodeI32Add, wasm.OpcodeI64Add: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - iadd := builder.AllocateInstruction() - iadd.AsIadd(x, y) - builder.InsertInstruction(iadd) - value := iadd.Return() - state.push(value) - case wasm.OpcodeI32Sub, wasm.OpcodeI64Sub: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - isub := builder.AllocateInstruction() - isub.AsIsub(x, y) - builder.InsertInstruction(isub) - value := isub.Return() - state.push(value) - case wasm.OpcodeF32Add, wasm.OpcodeF64Add: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - iadd := builder.AllocateInstruction() - iadd.AsFadd(x, y) - builder.InsertInstruction(iadd) - value := iadd.Return() - state.push(value) - case wasm.OpcodeI32Mul, wasm.OpcodeI64Mul: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - imul := builder.AllocateInstruction() - imul.AsImul(x, y) - builder.InsertInstruction(imul) - value := imul.Return() - state.push(value) - case wasm.OpcodeF32Sub, wasm.OpcodeF64Sub: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - isub := builder.AllocateInstruction() - isub.AsFsub(x, y) - builder.InsertInstruction(isub) - value := isub.Return() - state.push(value) - case wasm.OpcodeF32Mul, wasm.OpcodeF64Mul: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - isub := builder.AllocateInstruction() - isub.AsFmul(x, y) - builder.InsertInstruction(isub) - value := isub.Return() - state.push(value) - case wasm.OpcodeF32Div, wasm.OpcodeF64Div: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - isub := builder.AllocateInstruction() - isub.AsFdiv(x, y) - builder.InsertInstruction(isub) - value := isub.Return() - state.push(value) - case wasm.OpcodeF32Max, wasm.OpcodeF64Max: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - isub := builder.AllocateInstruction() - isub.AsFmax(x, y) - builder.InsertInstruction(isub) - value := isub.Return() - state.push(value) - case wasm.OpcodeF32Min, wasm.OpcodeF64Min: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - isub := builder.AllocateInstruction() - isub.AsFmin(x, y) - builder.InsertInstruction(isub) - value := isub.Return() - state.push(value) - case wasm.OpcodeI64Extend8S: - if state.unreachable { - break - } - c.insertIntegerExtend(true, 8, 64) - case wasm.OpcodeI64Extend16S: - if state.unreachable { - break - } - c.insertIntegerExtend(true, 16, 64) - case wasm.OpcodeI64Extend32S, wasm.OpcodeI64ExtendI32S: - if state.unreachable { - break - } - c.insertIntegerExtend(true, 32, 64) - case wasm.OpcodeI64ExtendI32U: - if state.unreachable { - break - } - c.insertIntegerExtend(false, 32, 64) - case wasm.OpcodeI32Extend8S: - if state.unreachable { - break - } - c.insertIntegerExtend(true, 8, 32) - case wasm.OpcodeI32Extend16S: - if state.unreachable { - break - } - c.insertIntegerExtend(true, 16, 32) - case wasm.OpcodeI32Eqz, wasm.OpcodeI64Eqz: - if state.unreachable { - break - } - x := state.pop() - zero := builder.AllocateInstruction() - if op == wasm.OpcodeI32Eqz { - zero.AsIconst32(0) - } else { - zero.AsIconst64(0) - } - builder.InsertInstruction(zero) - icmp := builder.AllocateInstruction(). - AsIcmp(x, zero.Return(), ssa.IntegerCmpCondEqual). - Insert(builder). - Return() - state.push(icmp) - case wasm.OpcodeI32Eq, wasm.OpcodeI64Eq: - if state.unreachable { - break - } - c.insertIcmp(ssa.IntegerCmpCondEqual) - case wasm.OpcodeI32Ne, wasm.OpcodeI64Ne: - if state.unreachable { - break - } - c.insertIcmp(ssa.IntegerCmpCondNotEqual) - case wasm.OpcodeI32LtS, wasm.OpcodeI64LtS: - if state.unreachable { - break - } - c.insertIcmp(ssa.IntegerCmpCondSignedLessThan) - case wasm.OpcodeI32LtU, wasm.OpcodeI64LtU: - if state.unreachable { - break - } - c.insertIcmp(ssa.IntegerCmpCondUnsignedLessThan) - case wasm.OpcodeI32GtS, wasm.OpcodeI64GtS: - if state.unreachable { - break - } - c.insertIcmp(ssa.IntegerCmpCondSignedGreaterThan) - case wasm.OpcodeI32GtU, wasm.OpcodeI64GtU: - if state.unreachable { - break - } - c.insertIcmp(ssa.IntegerCmpCondUnsignedGreaterThan) - case wasm.OpcodeI32LeS, wasm.OpcodeI64LeS: - if state.unreachable { - break - } - c.insertIcmp(ssa.IntegerCmpCondSignedLessThanOrEqual) - case wasm.OpcodeI32LeU, wasm.OpcodeI64LeU: - if state.unreachable { - break - } - c.insertIcmp(ssa.IntegerCmpCondUnsignedLessThanOrEqual) - case wasm.OpcodeI32GeS, wasm.OpcodeI64GeS: - if state.unreachable { - break - } - c.insertIcmp(ssa.IntegerCmpCondSignedGreaterThanOrEqual) - case wasm.OpcodeI32GeU, wasm.OpcodeI64GeU: - if state.unreachable { - break - } - c.insertIcmp(ssa.IntegerCmpCondUnsignedGreaterThanOrEqual) - - case wasm.OpcodeF32Eq, wasm.OpcodeF64Eq: - if state.unreachable { - break - } - c.insertFcmp(ssa.FloatCmpCondEqual) - case wasm.OpcodeF32Ne, wasm.OpcodeF64Ne: - if state.unreachable { - break - } - c.insertFcmp(ssa.FloatCmpCondNotEqual) - case wasm.OpcodeF32Lt, wasm.OpcodeF64Lt: - if state.unreachable { - break - } - c.insertFcmp(ssa.FloatCmpCondLessThan) - case wasm.OpcodeF32Gt, wasm.OpcodeF64Gt: - if state.unreachable { - break - } - c.insertFcmp(ssa.FloatCmpCondGreaterThan) - case wasm.OpcodeF32Le, wasm.OpcodeF64Le: - if state.unreachable { - break - } - c.insertFcmp(ssa.FloatCmpCondLessThanOrEqual) - case wasm.OpcodeF32Ge, wasm.OpcodeF64Ge: - if state.unreachable { - break - } - c.insertFcmp(ssa.FloatCmpCondGreaterThanOrEqual) - case wasm.OpcodeF32Neg, wasm.OpcodeF64Neg: - if state.unreachable { - break - } - x := state.pop() - v := builder.AllocateInstruction().AsFneg(x).Insert(builder).Return() - state.push(v) - case wasm.OpcodeF32Sqrt, wasm.OpcodeF64Sqrt: - if state.unreachable { - break - } - x := state.pop() - v := builder.AllocateInstruction().AsSqrt(x).Insert(builder).Return() - state.push(v) - case wasm.OpcodeF32Abs, wasm.OpcodeF64Abs: - if state.unreachable { - break - } - x := state.pop() - v := builder.AllocateInstruction().AsFabs(x).Insert(builder).Return() - state.push(v) - case wasm.OpcodeF32Copysign, wasm.OpcodeF64Copysign: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - v := builder.AllocateInstruction().AsFcopysign(x, y).Insert(builder).Return() - state.push(v) - - case wasm.OpcodeF32Ceil, wasm.OpcodeF64Ceil: - if state.unreachable { - break - } - x := state.pop() - v := builder.AllocateInstruction().AsCeil(x).Insert(builder).Return() - state.push(v) - case wasm.OpcodeF32Floor, wasm.OpcodeF64Floor: - if state.unreachable { - break - } - x := state.pop() - v := builder.AllocateInstruction().AsFloor(x).Insert(builder).Return() - state.push(v) - case wasm.OpcodeF32Trunc, wasm.OpcodeF64Trunc: - if state.unreachable { - break - } - x := state.pop() - v := builder.AllocateInstruction().AsTrunc(x).Insert(builder).Return() - state.push(v) - case wasm.OpcodeF32Nearest, wasm.OpcodeF64Nearest: - if state.unreachable { - break - } - x := state.pop() - v := builder.AllocateInstruction().AsNearest(x).Insert(builder).Return() - state.push(v) - case wasm.OpcodeI64TruncF64S, wasm.OpcodeI64TruncF32S, - wasm.OpcodeI32TruncF64S, wasm.OpcodeI32TruncF32S, - wasm.OpcodeI64TruncF64U, wasm.OpcodeI64TruncF32U, - wasm.OpcodeI32TruncF64U, wasm.OpcodeI32TruncF32U: - if state.unreachable { - break - } - ret := builder.AllocateInstruction().AsFcvtToInt( - state.pop(), - c.execCtxPtrValue, - op == wasm.OpcodeI64TruncF64S || op == wasm.OpcodeI64TruncF32S || op == wasm.OpcodeI32TruncF32S || op == wasm.OpcodeI32TruncF64S, - op == wasm.OpcodeI64TruncF64S || op == wasm.OpcodeI64TruncF32S || op == wasm.OpcodeI64TruncF64U || op == wasm.OpcodeI64TruncF32U, - false, - ).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeMiscPrefix: - state.pc++ - // A misc opcode is encoded as an unsigned variable 32-bit integer. - miscOpUint, num, err := leb128.LoadUint32(c.wasmFunctionBody[state.pc:]) - if err != nil { - // In normal conditions this should never happen because the function has passed validation. - panic(fmt.Sprintf("failed to read misc opcode: %v", err)) - } - state.pc += int(num - 1) - miscOp := wasm.OpcodeMisc(miscOpUint) - switch miscOp { - case wasm.OpcodeMiscI64TruncSatF64S, wasm.OpcodeMiscI64TruncSatF32S, - wasm.OpcodeMiscI32TruncSatF64S, wasm.OpcodeMiscI32TruncSatF32S, - wasm.OpcodeMiscI64TruncSatF64U, wasm.OpcodeMiscI64TruncSatF32U, - wasm.OpcodeMiscI32TruncSatF64U, wasm.OpcodeMiscI32TruncSatF32U: - if state.unreachable { - break - } - ret := builder.AllocateInstruction().AsFcvtToInt( - state.pop(), - c.execCtxPtrValue, - miscOp == wasm.OpcodeMiscI64TruncSatF64S || miscOp == wasm.OpcodeMiscI64TruncSatF32S || miscOp == wasm.OpcodeMiscI32TruncSatF32S || miscOp == wasm.OpcodeMiscI32TruncSatF64S, - miscOp == wasm.OpcodeMiscI64TruncSatF64S || miscOp == wasm.OpcodeMiscI64TruncSatF32S || miscOp == wasm.OpcodeMiscI64TruncSatF64U || miscOp == wasm.OpcodeMiscI64TruncSatF32U, - true, - ).Insert(builder).Return() - state.push(ret) - - case wasm.OpcodeMiscTableSize: - tableIndex := c.readI32u() - if state.unreachable { - break - } - - // Load the table. - loadTableInstancePtr := builder.AllocateInstruction() - loadTableInstancePtr.AsLoad(c.moduleCtxPtrValue, c.offset.TableOffset(int(tableIndex)).U32(), ssa.TypeI64) - builder.InsertInstruction(loadTableInstancePtr) - tableInstancePtr := loadTableInstancePtr.Return() - - // Load the table's length. - loadTableLen := builder.AllocateInstruction(). - AsLoad(tableInstancePtr, tableInstanceLenOffset, ssa.TypeI32). - Insert(builder) - state.push(loadTableLen.Return()) - - case wasm.OpcodeMiscTableGrow: - tableIndex := c.readI32u() - if state.unreachable { - break - } - - c.storeCallerModuleContext() - - tableIndexVal := builder.AllocateInstruction().AsIconst32(tableIndex).Insert(builder).Return() - - num := state.pop() - r := state.pop() - - tableGrowPtr := builder.AllocateInstruction(). - AsLoad(c.execCtxPtrValue, - wazevoapi.ExecutionContextOffsetTableGrowTrampolineAddress.U32(), - ssa.TypeI64, - ).Insert(builder).Return() - - args := c.allocateVarLengthValues(4, c.execCtxPtrValue, tableIndexVal, num, r) - callGrowRet := builder. - AllocateInstruction(). - AsCallIndirect(tableGrowPtr, &c.tableGrowSig, args). - Insert(builder).Return() - state.push(callGrowRet) - - case wasm.OpcodeMiscTableCopy: - dstTableIndex := c.readI32u() - srcTableIndex := c.readI32u() - if state.unreachable { - break - } - - copySize := builder. - AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return() - srcOffset := builder. - AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return() - dstOffset := builder. - AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return() - - // Out of bounds check. - dstTableInstancePtr := c.boundsCheckInTable(dstTableIndex, dstOffset, copySize) - srcTableInstancePtr := c.boundsCheckInTable(srcTableIndex, srcOffset, copySize) - - dstTableBaseAddr := c.loadTableBaseAddr(dstTableInstancePtr) - srcTableBaseAddr := c.loadTableBaseAddr(srcTableInstancePtr) - - three := builder.AllocateInstruction().AsIconst64(3).Insert(builder).Return() - - dstOffsetInBytes := builder.AllocateInstruction().AsIshl(dstOffset, three).Insert(builder).Return() - dstAddr := builder.AllocateInstruction().AsIadd(dstTableBaseAddr, dstOffsetInBytes).Insert(builder).Return() - srcOffsetInBytes := builder.AllocateInstruction().AsIshl(srcOffset, three).Insert(builder).Return() - srcAddr := builder.AllocateInstruction().AsIadd(srcTableBaseAddr, srcOffsetInBytes).Insert(builder).Return() - - copySizeInBytes := builder.AllocateInstruction().AsIshl(copySize, three).Insert(builder).Return() - c.callMemmove(dstAddr, srcAddr, copySizeInBytes) - - case wasm.OpcodeMiscMemoryCopy: - state.pc += 2 // +2 to skip two memory indexes which are fixed to zero. - if state.unreachable { - break - } - - copySize := builder. - AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return() - srcOffset := builder. - AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return() - dstOffset := builder. - AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return() - - // Out of bounds check. - memLen := c.getMemoryLenValue(false) - c.boundsCheckInMemory(memLen, dstOffset, copySize) - c.boundsCheckInMemory(memLen, srcOffset, copySize) - - memBase := c.getMemoryBaseValue(false) - dstAddr := builder.AllocateInstruction().AsIadd(memBase, dstOffset).Insert(builder).Return() - srcAddr := builder.AllocateInstruction().AsIadd(memBase, srcOffset).Insert(builder).Return() - - c.callMemmove(dstAddr, srcAddr, copySize) - - case wasm.OpcodeMiscTableFill: - tableIndex := c.readI32u() - if state.unreachable { - break - } - fillSize := state.pop() - value := state.pop() - offset := state.pop() - - fillSizeExt := builder. - AllocateInstruction().AsUExtend(fillSize, 32, 64).Insert(builder).Return() - offsetExt := builder. - AllocateInstruction().AsUExtend(offset, 32, 64).Insert(builder).Return() - tableInstancePtr := c.boundsCheckInTable(tableIndex, offsetExt, fillSizeExt) - - three := builder.AllocateInstruction().AsIconst64(3).Insert(builder).Return() - offsetInBytes := builder.AllocateInstruction().AsIshl(offsetExt, three).Insert(builder).Return() - fillSizeInBytes := builder.AllocateInstruction().AsIshl(fillSizeExt, three).Insert(builder).Return() - - // Calculate the base address of the table. - tableBaseAddr := c.loadTableBaseAddr(tableInstancePtr) - addr := builder.AllocateInstruction().AsIadd(tableBaseAddr, offsetInBytes).Insert(builder).Return() - - // Prepare the loop and following block. - beforeLoop := builder.AllocateBasicBlock() - loopBlk := builder.AllocateBasicBlock() - loopVar := loopBlk.AddParam(builder, ssa.TypeI64) - followingBlk := builder.AllocateBasicBlock() - - // Uses the copy trick for faster filling buffer like memory.fill, but in this case we copy 8 bytes at a time. - // buf := memoryInst.Buffer[offset : offset+fillSize] - // buf[0:8] = value - // for i := 8; i < fillSize; i *= 2 { Begin with 8 bytes. - // copy(buf[i:], buf[:i]) - // } - - // Insert the jump to the beforeLoop block; If the fillSize is zero, then jump to the following block to skip entire logics. - zero := builder.AllocateInstruction().AsIconst64(0).Insert(builder).Return() - ifFillSizeZero := builder.AllocateInstruction().AsIcmp(fillSizeExt, zero, ssa.IntegerCmpCondEqual). - Insert(builder).Return() - builder.AllocateInstruction().AsBrnz(ifFillSizeZero, ssa.ValuesNil, followingBlk).Insert(builder) - c.insertJumpToBlock(ssa.ValuesNil, beforeLoop) - - // buf[0:8] = value - builder.SetCurrentBlock(beforeLoop) - builder.AllocateInstruction().AsStore(ssa.OpcodeStore, value, addr, 0).Insert(builder) - initValue := builder.AllocateInstruction().AsIconst64(8).Insert(builder).Return() - c.insertJumpToBlock(c.allocateVarLengthValues(1, initValue), loopBlk) - - builder.SetCurrentBlock(loopBlk) - dstAddr := builder.AllocateInstruction().AsIadd(addr, loopVar).Insert(builder).Return() - - // If loopVar*2 > fillSizeInBytes, then count must be fillSizeInBytes-loopVar. - var count ssa.Value - { - loopVarDoubled := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return() - loopVarDoubledLargerThanFillSize := builder. - AllocateInstruction().AsIcmp(loopVarDoubled, fillSizeInBytes, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual). - Insert(builder).Return() - diff := builder.AllocateInstruction().AsIsub(fillSizeInBytes, loopVar).Insert(builder).Return() - count = builder.AllocateInstruction().AsSelect(loopVarDoubledLargerThanFillSize, diff, loopVar).Insert(builder).Return() - } - - c.callMemmove(dstAddr, addr, count) - - shiftAmount := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return() - newLoopVar := builder.AllocateInstruction().AsIshl(loopVar, shiftAmount).Insert(builder).Return() - loopVarLessThanFillSize := builder.AllocateInstruction(). - AsIcmp(newLoopVar, fillSizeInBytes, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return() - - builder.AllocateInstruction(). - AsBrnz(loopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk). - Insert(builder) - - c.insertJumpToBlock(ssa.ValuesNil, followingBlk) - builder.SetCurrentBlock(followingBlk) - - builder.Seal(beforeLoop) - builder.Seal(loopBlk) - builder.Seal(followingBlk) - - case wasm.OpcodeMiscMemoryFill: - state.pc++ // Skip the memory index which is fixed to zero. - if state.unreachable { - break - } - - fillSize := builder. - AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return() - value := state.pop() - offset := builder. - AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return() - - // Out of bounds check. - c.boundsCheckInMemory(c.getMemoryLenValue(false), offset, fillSize) - - // Calculate the base address: - addr := builder.AllocateInstruction().AsIadd(c.getMemoryBaseValue(false), offset).Insert(builder).Return() - - // Uses the copy trick for faster filling buffer: https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d - // buf := memoryInst.Buffer[offset : offset+fillSize] - // buf[0] = value - // for i := 1; i < fillSize; i *= 2 { - // copy(buf[i:], buf[:i]) - // } - - // Prepare the loop and following block. - beforeLoop := builder.AllocateBasicBlock() - loopBlk := builder.AllocateBasicBlock() - loopVar := loopBlk.AddParam(builder, ssa.TypeI64) - followingBlk := builder.AllocateBasicBlock() - - // Insert the jump to the beforeLoop block; If the fillSize is zero, then jump to the following block to skip entire logics. - zero := builder.AllocateInstruction().AsIconst64(0).Insert(builder).Return() - ifFillSizeZero := builder.AllocateInstruction().AsIcmp(fillSize, zero, ssa.IntegerCmpCondEqual). - Insert(builder).Return() - builder.AllocateInstruction().AsBrnz(ifFillSizeZero, ssa.ValuesNil, followingBlk).Insert(builder) - c.insertJumpToBlock(ssa.ValuesNil, beforeLoop) - - // buf[0] = value - builder.SetCurrentBlock(beforeLoop) - builder.AllocateInstruction().AsStore(ssa.OpcodeIstore8, value, addr, 0).Insert(builder) - initValue := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return() - c.insertJumpToBlock(c.allocateVarLengthValues(1, initValue), loopBlk) - - builder.SetCurrentBlock(loopBlk) - dstAddr := builder.AllocateInstruction().AsIadd(addr, loopVar).Insert(builder).Return() - - // If loopVar*2 > fillSizeExt, then count must be fillSizeExt-loopVar. - var count ssa.Value - { - loopVarDoubled := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return() - loopVarDoubledLargerThanFillSize := builder. - AllocateInstruction().AsIcmp(loopVarDoubled, fillSize, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual). - Insert(builder).Return() - diff := builder.AllocateInstruction().AsIsub(fillSize, loopVar).Insert(builder).Return() - count = builder.AllocateInstruction().AsSelect(loopVarDoubledLargerThanFillSize, diff, loopVar).Insert(builder).Return() - } - - c.callMemmove(dstAddr, addr, count) - - shiftAmount := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return() - newLoopVar := builder.AllocateInstruction().AsIshl(loopVar, shiftAmount).Insert(builder).Return() - loopVarLessThanFillSize := builder.AllocateInstruction(). - AsIcmp(newLoopVar, fillSize, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return() - - builder.AllocateInstruction(). - AsBrnz(loopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk). - Insert(builder) - - c.insertJumpToBlock(ssa.ValuesNil, followingBlk) - builder.SetCurrentBlock(followingBlk) - - builder.Seal(beforeLoop) - builder.Seal(loopBlk) - builder.Seal(followingBlk) - - case wasm.OpcodeMiscMemoryInit: - index := c.readI32u() - state.pc++ // Skip the memory index which is fixed to zero. - if state.unreachable { - break - } - - copySize := builder. - AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return() - offsetInDataInstance := builder. - AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return() - offsetInMemory := builder. - AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return() - - dataInstPtr := c.dataOrElementInstanceAddr(index, c.offset.DataInstances1stElement) - - // Bounds check. - c.boundsCheckInMemory(c.getMemoryLenValue(false), offsetInMemory, copySize) - c.boundsCheckInDataOrElementInstance(dataInstPtr, offsetInDataInstance, copySize, wazevoapi.ExitCodeMemoryOutOfBounds) - - dataInstBaseAddr := builder.AllocateInstruction().AsLoad(dataInstPtr, 0, ssa.TypeI64).Insert(builder).Return() - srcAddr := builder.AllocateInstruction().AsIadd(dataInstBaseAddr, offsetInDataInstance).Insert(builder).Return() - - memBase := c.getMemoryBaseValue(false) - dstAddr := builder.AllocateInstruction().AsIadd(memBase, offsetInMemory).Insert(builder).Return() - - c.callMemmove(dstAddr, srcAddr, copySize) - - case wasm.OpcodeMiscTableInit: - elemIndex := c.readI32u() - tableIndex := c.readI32u() - if state.unreachable { - break - } - - copySize := builder. - AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return() - offsetInElementInstance := builder. - AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return() - offsetInTable := builder. - AllocateInstruction().AsUExtend(state.pop(), 32, 64).Insert(builder).Return() - - elemInstPtr := c.dataOrElementInstanceAddr(elemIndex, c.offset.ElementInstances1stElement) - - // Bounds check. - tableInstancePtr := c.boundsCheckInTable(tableIndex, offsetInTable, copySize) - c.boundsCheckInDataOrElementInstance(elemInstPtr, offsetInElementInstance, copySize, wazevoapi.ExitCodeTableOutOfBounds) - - three := builder.AllocateInstruction().AsIconst64(3).Insert(builder).Return() - // Calculates the destination address in the table. - tableOffsetInBytes := builder.AllocateInstruction().AsIshl(offsetInTable, three).Insert(builder).Return() - tableBaseAddr := c.loadTableBaseAddr(tableInstancePtr) - dstAddr := builder.AllocateInstruction().AsIadd(tableBaseAddr, tableOffsetInBytes).Insert(builder).Return() - - // Calculates the source address in the element instance. - srcOffsetInBytes := builder.AllocateInstruction().AsIshl(offsetInElementInstance, three).Insert(builder).Return() - elemInstBaseAddr := builder.AllocateInstruction().AsLoad(elemInstPtr, 0, ssa.TypeI64).Insert(builder).Return() - srcAddr := builder.AllocateInstruction().AsIadd(elemInstBaseAddr, srcOffsetInBytes).Insert(builder).Return() - - copySizeInBytes := builder.AllocateInstruction().AsIshl(copySize, three).Insert(builder).Return() - c.callMemmove(dstAddr, srcAddr, copySizeInBytes) - - case wasm.OpcodeMiscElemDrop: - index := c.readI32u() - if state.unreachable { - break - } - - c.dropDataOrElementInstance(index, c.offset.ElementInstances1stElement) - - case wasm.OpcodeMiscDataDrop: - index := c.readI32u() - if state.unreachable { - break - } - c.dropDataOrElementInstance(index, c.offset.DataInstances1stElement) - - default: - panic("Unknown MiscOp " + wasm.MiscInstructionName(miscOp)) - } - - case wasm.OpcodeI32ReinterpretF32: - if state.unreachable { - break - } - reinterpret := builder.AllocateInstruction(). - AsBitcast(state.pop(), ssa.TypeI32). - Insert(builder).Return() - state.push(reinterpret) - - case wasm.OpcodeI64ReinterpretF64: - if state.unreachable { - break - } - reinterpret := builder.AllocateInstruction(). - AsBitcast(state.pop(), ssa.TypeI64). - Insert(builder).Return() - state.push(reinterpret) - - case wasm.OpcodeF32ReinterpretI32: - if state.unreachable { - break - } - reinterpret := builder.AllocateInstruction(). - AsBitcast(state.pop(), ssa.TypeF32). - Insert(builder).Return() - state.push(reinterpret) - - case wasm.OpcodeF64ReinterpretI64: - if state.unreachable { - break - } - reinterpret := builder.AllocateInstruction(). - AsBitcast(state.pop(), ssa.TypeF64). - Insert(builder).Return() - state.push(reinterpret) - - case wasm.OpcodeI32DivS, wasm.OpcodeI64DivS: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - result := builder.AllocateInstruction().AsSDiv(x, y, c.execCtxPtrValue).Insert(builder).Return() - state.push(result) - - case wasm.OpcodeI32DivU, wasm.OpcodeI64DivU: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - result := builder.AllocateInstruction().AsUDiv(x, y, c.execCtxPtrValue).Insert(builder).Return() - state.push(result) - - case wasm.OpcodeI32RemS, wasm.OpcodeI64RemS: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - result := builder.AllocateInstruction().AsSRem(x, y, c.execCtxPtrValue).Insert(builder).Return() - state.push(result) - - case wasm.OpcodeI32RemU, wasm.OpcodeI64RemU: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - result := builder.AllocateInstruction().AsURem(x, y, c.execCtxPtrValue).Insert(builder).Return() - state.push(result) - - case wasm.OpcodeI32And, wasm.OpcodeI64And: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - and := builder.AllocateInstruction() - and.AsBand(x, y) - builder.InsertInstruction(and) - value := and.Return() - state.push(value) - case wasm.OpcodeI32Or, wasm.OpcodeI64Or: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - or := builder.AllocateInstruction() - or.AsBor(x, y) - builder.InsertInstruction(or) - value := or.Return() - state.push(value) - case wasm.OpcodeI32Xor, wasm.OpcodeI64Xor: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - xor := builder.AllocateInstruction() - xor.AsBxor(x, y) - builder.InsertInstruction(xor) - value := xor.Return() - state.push(value) - case wasm.OpcodeI32Shl, wasm.OpcodeI64Shl: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - ishl := builder.AllocateInstruction() - ishl.AsIshl(x, y) - builder.InsertInstruction(ishl) - value := ishl.Return() - state.push(value) - case wasm.OpcodeI32ShrU, wasm.OpcodeI64ShrU: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - ishl := builder.AllocateInstruction() - ishl.AsUshr(x, y) - builder.InsertInstruction(ishl) - value := ishl.Return() - state.push(value) - case wasm.OpcodeI32ShrS, wasm.OpcodeI64ShrS: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - ishl := builder.AllocateInstruction() - ishl.AsSshr(x, y) - builder.InsertInstruction(ishl) - value := ishl.Return() - state.push(value) - case wasm.OpcodeI32Rotl, wasm.OpcodeI64Rotl: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - rotl := builder.AllocateInstruction() - rotl.AsRotl(x, y) - builder.InsertInstruction(rotl) - value := rotl.Return() - state.push(value) - case wasm.OpcodeI32Rotr, wasm.OpcodeI64Rotr: - if state.unreachable { - break - } - y, x := state.pop(), state.pop() - rotr := builder.AllocateInstruction() - rotr.AsRotr(x, y) - builder.InsertInstruction(rotr) - value := rotr.Return() - state.push(value) - case wasm.OpcodeI32Clz, wasm.OpcodeI64Clz: - if state.unreachable { - break - } - x := state.pop() - clz := builder.AllocateInstruction() - clz.AsClz(x) - builder.InsertInstruction(clz) - value := clz.Return() - state.push(value) - case wasm.OpcodeI32Ctz, wasm.OpcodeI64Ctz: - if state.unreachable { - break - } - x := state.pop() - ctz := builder.AllocateInstruction() - ctz.AsCtz(x) - builder.InsertInstruction(ctz) - value := ctz.Return() - state.push(value) - case wasm.OpcodeI32Popcnt, wasm.OpcodeI64Popcnt: - if state.unreachable { - break - } - x := state.pop() - popcnt := builder.AllocateInstruction() - popcnt.AsPopcnt(x) - builder.InsertInstruction(popcnt) - value := popcnt.Return() - state.push(value) - - case wasm.OpcodeI32WrapI64: - if state.unreachable { - break - } - x := state.pop() - wrap := builder.AllocateInstruction().AsIreduce(x, ssa.TypeI32).Insert(builder).Return() - state.push(wrap) - case wasm.OpcodeGlobalGet: - index := c.readI32u() - if state.unreachable { - break - } - v := c.getWasmGlobalValue(index, false) - state.push(v) - case wasm.OpcodeGlobalSet: - index := c.readI32u() - if state.unreachable { - break - } - v := state.pop() - c.setWasmGlobalValue(index, v) - case wasm.OpcodeLocalGet: - index := c.readI32u() - if state.unreachable { - break - } - variable := c.localVariable(index) - state.push(builder.MustFindValue(variable)) - - case wasm.OpcodeLocalSet: - index := c.readI32u() - if state.unreachable { - break - } - variable := c.localVariable(index) - newValue := state.pop() - builder.DefineVariableInCurrentBB(variable, newValue) - - case wasm.OpcodeLocalTee: - index := c.readI32u() - if state.unreachable { - break - } - variable := c.localVariable(index) - newValue := state.peek() - builder.DefineVariableInCurrentBB(variable, newValue) - - case wasm.OpcodeSelect, wasm.OpcodeTypedSelect: - if op == wasm.OpcodeTypedSelect { - state.pc += 2 // ignores the type which is only needed during validation. - } - - if state.unreachable { - break - } - - cond := state.pop() - v2 := state.pop() - v1 := state.pop() - - sl := builder.AllocateInstruction(). - AsSelect(cond, v1, v2). - Insert(builder). - Return() - state.push(sl) - - case wasm.OpcodeMemorySize: - state.pc++ // skips the memory index. - if state.unreachable { - break - } - - var memSizeInBytes ssa.Value - if c.offset.LocalMemoryBegin < 0 { - memInstPtr := builder.AllocateInstruction(). - AsLoad(c.moduleCtxPtrValue, c.offset.ImportedMemoryBegin.U32(), ssa.TypeI64). - Insert(builder). - Return() - - memSizeInBytes = builder.AllocateInstruction(). - AsLoad(memInstPtr, memoryInstanceBufSizeOffset, ssa.TypeI32). - Insert(builder). - Return() - } else { - memSizeInBytes = builder.AllocateInstruction(). - AsLoad(c.moduleCtxPtrValue, c.offset.LocalMemoryLen().U32(), ssa.TypeI32). - Insert(builder). - Return() - } - - amount := builder.AllocateInstruction() - amount.AsIconst32(uint32(wasm.MemoryPageSizeInBits)) - builder.InsertInstruction(amount) - memSize := builder.AllocateInstruction(). - AsUshr(memSizeInBytes, amount.Return()). - Insert(builder). - Return() - state.push(memSize) - - case wasm.OpcodeMemoryGrow: - state.pc++ // skips the memory index. - if state.unreachable { - break - } - - c.storeCallerModuleContext() - - pages := state.pop() - memoryGrowPtr := builder.AllocateInstruction(). - AsLoad(c.execCtxPtrValue, - wazevoapi.ExecutionContextOffsetMemoryGrowTrampolineAddress.U32(), - ssa.TypeI64, - ).Insert(builder).Return() - - args := c.allocateVarLengthValues(1, c.execCtxPtrValue, pages) - callGrowRet := builder. - AllocateInstruction(). - AsCallIndirect(memoryGrowPtr, &c.memoryGrowSig, args). - Insert(builder).Return() - state.push(callGrowRet) - - // After the memory grow, reload the cached memory base and len. - c.reloadMemoryBaseLen() - - case wasm.OpcodeI32Store, - wasm.OpcodeI64Store, - wasm.OpcodeF32Store, - wasm.OpcodeF64Store, - wasm.OpcodeI32Store8, - wasm.OpcodeI32Store16, - wasm.OpcodeI64Store8, - wasm.OpcodeI64Store16, - wasm.OpcodeI64Store32: - - _, offset := c.readMemArg() - if state.unreachable { - break - } - var opSize uint64 - var opcode ssa.Opcode - switch op { - case wasm.OpcodeI32Store, wasm.OpcodeF32Store: - opcode = ssa.OpcodeStore - opSize = 4 - case wasm.OpcodeI64Store, wasm.OpcodeF64Store: - opcode = ssa.OpcodeStore - opSize = 8 - case wasm.OpcodeI32Store8, wasm.OpcodeI64Store8: - opcode = ssa.OpcodeIstore8 - opSize = 1 - case wasm.OpcodeI32Store16, wasm.OpcodeI64Store16: - opcode = ssa.OpcodeIstore16 - opSize = 2 - case wasm.OpcodeI64Store32: - opcode = ssa.OpcodeIstore32 - opSize = 4 - default: - panic("BUG") - } - - value := state.pop() - baseAddr := state.pop() - addr := c.memOpSetup(baseAddr, uint64(offset), opSize) - builder.AllocateInstruction(). - AsStore(opcode, value, addr, offset). - Insert(builder) - - case wasm.OpcodeI32Load, - wasm.OpcodeI64Load, - wasm.OpcodeF32Load, - wasm.OpcodeF64Load, - wasm.OpcodeI32Load8S, - wasm.OpcodeI32Load8U, - wasm.OpcodeI32Load16S, - wasm.OpcodeI32Load16U, - wasm.OpcodeI64Load8S, - wasm.OpcodeI64Load8U, - wasm.OpcodeI64Load16S, - wasm.OpcodeI64Load16U, - wasm.OpcodeI64Load32S, - wasm.OpcodeI64Load32U: - _, offset := c.readMemArg() - if state.unreachable { - break - } - - var opSize uint64 - switch op { - case wasm.OpcodeI32Load, wasm.OpcodeF32Load: - opSize = 4 - case wasm.OpcodeI64Load, wasm.OpcodeF64Load: - opSize = 8 - case wasm.OpcodeI32Load8S, wasm.OpcodeI32Load8U: - opSize = 1 - case wasm.OpcodeI32Load16S, wasm.OpcodeI32Load16U: - opSize = 2 - case wasm.OpcodeI64Load8S, wasm.OpcodeI64Load8U: - opSize = 1 - case wasm.OpcodeI64Load16S, wasm.OpcodeI64Load16U: - opSize = 2 - case wasm.OpcodeI64Load32S, wasm.OpcodeI64Load32U: - opSize = 4 - default: - panic("BUG") - } - - baseAddr := state.pop() - addr := c.memOpSetup(baseAddr, uint64(offset), opSize) - load := builder.AllocateInstruction() - switch op { - case wasm.OpcodeI32Load: - load.AsLoad(addr, offset, ssa.TypeI32) - case wasm.OpcodeI64Load: - load.AsLoad(addr, offset, ssa.TypeI64) - case wasm.OpcodeF32Load: - load.AsLoad(addr, offset, ssa.TypeF32) - case wasm.OpcodeF64Load: - load.AsLoad(addr, offset, ssa.TypeF64) - case wasm.OpcodeI32Load8S: - load.AsExtLoad(ssa.OpcodeSload8, addr, offset, false) - case wasm.OpcodeI32Load8U: - load.AsExtLoad(ssa.OpcodeUload8, addr, offset, false) - case wasm.OpcodeI32Load16S: - load.AsExtLoad(ssa.OpcodeSload16, addr, offset, false) - case wasm.OpcodeI32Load16U: - load.AsExtLoad(ssa.OpcodeUload16, addr, offset, false) - case wasm.OpcodeI64Load8S: - load.AsExtLoad(ssa.OpcodeSload8, addr, offset, true) - case wasm.OpcodeI64Load8U: - load.AsExtLoad(ssa.OpcodeUload8, addr, offset, true) - case wasm.OpcodeI64Load16S: - load.AsExtLoad(ssa.OpcodeSload16, addr, offset, true) - case wasm.OpcodeI64Load16U: - load.AsExtLoad(ssa.OpcodeUload16, addr, offset, true) - case wasm.OpcodeI64Load32S: - load.AsExtLoad(ssa.OpcodeSload32, addr, offset, true) - case wasm.OpcodeI64Load32U: - load.AsExtLoad(ssa.OpcodeUload32, addr, offset, true) - default: - panic("BUG") - } - builder.InsertInstruction(load) - state.push(load.Return()) - case wasm.OpcodeBlock: - // Note: we do not need to create a BB for this as that would always have only one predecessor - // which is the current BB, and therefore it's always ok to merge them in any way. - - bt := c.readBlockType() - - if state.unreachable { - state.unreachableDepth++ - break - } - - followingBlk := builder.AllocateBasicBlock() - c.addBlockParamsFromWasmTypes(bt.Results, followingBlk) - - state.ctrlPush(controlFrame{ - kind: controlFrameKindBlock, - originalStackLenWithoutParam: len(state.values) - len(bt.Params), - followingBlock: followingBlk, - blockType: bt, - }) - case wasm.OpcodeLoop: - bt := c.readBlockType() - - if state.unreachable { - state.unreachableDepth++ - break - } - - loopHeader, afterLoopBlock := builder.AllocateBasicBlock(), builder.AllocateBasicBlock() - c.addBlockParamsFromWasmTypes(bt.Params, loopHeader) - c.addBlockParamsFromWasmTypes(bt.Results, afterLoopBlock) - - originalLen := len(state.values) - len(bt.Params) - state.ctrlPush(controlFrame{ - originalStackLenWithoutParam: originalLen, - kind: controlFrameKindLoop, - blk: loopHeader, - followingBlock: afterLoopBlock, - blockType: bt, - }) - - args := c.allocateVarLengthValues(originalLen) - args = args.Append(builder.VarLengthPool(), state.values[originalLen:]...) - - // Insert the jump to the header of loop. - br := builder.AllocateInstruction() - br.AsJump(args, loopHeader) - builder.InsertInstruction(br) - - c.switchTo(originalLen, loopHeader) - - if c.ensureTermination { - checkModuleExitCodePtr := builder.AllocateInstruction(). - AsLoad(c.execCtxPtrValue, - wazevoapi.ExecutionContextOffsetCheckModuleExitCodeTrampolineAddress.U32(), - ssa.TypeI64, - ).Insert(builder).Return() - - args := c.allocateVarLengthValues(1, c.execCtxPtrValue) - builder.AllocateInstruction(). - AsCallIndirect(checkModuleExitCodePtr, &c.checkModuleExitCodeSig, args). - Insert(builder) - } - case wasm.OpcodeIf: - bt := c.readBlockType() - - if state.unreachable { - state.unreachableDepth++ - break - } - - v := state.pop() - thenBlk, elseBlk, followingBlk := builder.AllocateBasicBlock(), builder.AllocateBasicBlock(), builder.AllocateBasicBlock() - - // We do not make the Wasm-level block parameters as SSA-level block params for if-else blocks - // since they won't be PHI and the definition is unique. - - // On the other hand, the following block after if-else-end will likely have - // multiple definitions (one in Then and another in Else blocks). - c.addBlockParamsFromWasmTypes(bt.Results, followingBlk) - - args := c.allocateVarLengthValues(len(bt.Params)) - args = args.Append(builder.VarLengthPool(), state.values[len(state.values)-len(bt.Params):]...) - - // Insert the conditional jump to the Else block. - brz := builder.AllocateInstruction() - brz.AsBrz(v, ssa.ValuesNil, elseBlk) - builder.InsertInstruction(brz) - - // Then, insert the jump to the Then block. - br := builder.AllocateInstruction() - br.AsJump(ssa.ValuesNil, thenBlk) - builder.InsertInstruction(br) - - state.ctrlPush(controlFrame{ - kind: controlFrameKindIfWithoutElse, - originalStackLenWithoutParam: len(state.values) - len(bt.Params), - blk: elseBlk, - followingBlock: followingBlk, - blockType: bt, - clonedArgs: args, - }) - - builder.SetCurrentBlock(thenBlk) - - // Then and Else (if exists) have only one predecessor. - builder.Seal(thenBlk) - builder.Seal(elseBlk) - case wasm.OpcodeElse: - ifctrl := state.ctrlPeekAt(0) - if unreachable := state.unreachable; unreachable && state.unreachableDepth > 0 { - // If it is currently in unreachable and is a nested if, - // we just remove the entire else block. - break - } - - ifctrl.kind = controlFrameKindIfWithElse - if !state.unreachable { - // If this Then block is currently reachable, we have to insert the branching to the following BB. - followingBlk := ifctrl.followingBlock // == the BB after if-then-else. - args := c.nPeekDup(len(ifctrl.blockType.Results)) - c.insertJumpToBlock(args, followingBlk) - } else { - state.unreachable = false - } - - // Reset the stack so that we can correctly handle the else block. - state.values = state.values[:ifctrl.originalStackLenWithoutParam] - elseBlk := ifctrl.blk - for _, arg := range ifctrl.clonedArgs.View() { - state.push(arg) - } - - builder.SetCurrentBlock(elseBlk) - - case wasm.OpcodeEnd: - if state.unreachableDepth > 0 { - state.unreachableDepth-- - break - } - - ctrl := state.ctrlPop() - followingBlk := ctrl.followingBlock - - unreachable := state.unreachable - if !unreachable { - // Top n-th args will be used as a result of the current control frame. - args := c.nPeekDup(len(ctrl.blockType.Results)) - - // Insert the unconditional branch to the target. - c.insertJumpToBlock(args, followingBlk) - } else { // recover from the unreachable state. - state.unreachable = false - } - - switch ctrl.kind { - case controlFrameKindFunction: - break // This is the very end of function. - case controlFrameKindLoop: - // Loop header block can be reached from any br/br_table contained in the loop, - // so now that we've reached End of it, we can seal it. - builder.Seal(ctrl.blk) - case controlFrameKindIfWithoutElse: - // If this is the end of Then block, we have to emit the empty Else block. - elseBlk := ctrl.blk - builder.SetCurrentBlock(elseBlk) - c.insertJumpToBlock(ctrl.clonedArgs, followingBlk) - } - - builder.Seal(followingBlk) - - // Ready to start translating the following block. - c.switchTo(ctrl.originalStackLenWithoutParam, followingBlk) - - case wasm.OpcodeBr: - labelIndex := c.readI32u() - if state.unreachable { - break - } - - targetBlk, argNum := state.brTargetArgNumFor(labelIndex) - args := c.nPeekDup(argNum) - c.insertJumpToBlock(args, targetBlk) - - state.unreachable = true - - case wasm.OpcodeBrIf: - labelIndex := c.readI32u() - if state.unreachable { - break - } - - v := state.pop() - - targetBlk, argNum := state.brTargetArgNumFor(labelIndex) - args := c.nPeekDup(argNum) - var sealTargetBlk bool - if c.needListener && targetBlk.ReturnBlock() { // In this case, we have to call the listener before returning. - // Save the currently active block. - current := builder.CurrentBlock() - - // Allocate the trampoline block to the return where we call the listener. - targetBlk = builder.AllocateBasicBlock() - builder.SetCurrentBlock(targetBlk) - sealTargetBlk = true - - c.callListenerAfter() - - instr := builder.AllocateInstruction() - instr.AsReturn(args) - builder.InsertInstruction(instr) - - args = ssa.ValuesNil - - // Revert the current block. - builder.SetCurrentBlock(current) - } - - // Insert the conditional jump to the target block. - brnz := builder.AllocateInstruction() - brnz.AsBrnz(v, args, targetBlk) - builder.InsertInstruction(brnz) - - if sealTargetBlk { - builder.Seal(targetBlk) - } - - // Insert the unconditional jump to the Else block which corresponds to after br_if. - elseBlk := builder.AllocateBasicBlock() - c.insertJumpToBlock(ssa.ValuesNil, elseBlk) - - // Now start translating the instructions after br_if. - builder.Seal(elseBlk) // Else of br_if has the current block as the only one successor. - builder.SetCurrentBlock(elseBlk) - - case wasm.OpcodeBrTable: - labels := state.tmpForBrTable[:0] - labelCount := c.readI32u() - for i := 0; i < int(labelCount); i++ { - labels = append(labels, c.readI32u()) - } - labels = append(labels, c.readI32u()) // default label. - if state.unreachable { - break - } - - index := state.pop() - if labelCount == 0 { // If this br_table is empty, we can just emit the unconditional jump. - targetBlk, argNum := state.brTargetArgNumFor(labels[0]) - args := c.nPeekDup(argNum) - c.insertJumpToBlock(args, targetBlk) - } else { - c.lowerBrTable(labels, index) - } - state.tmpForBrTable = labels // reuse the temporary slice for next use. - state.unreachable = true - - case wasm.OpcodeNop: - case wasm.OpcodeReturn: - if state.unreachable { - break - } - if c.needListener { - c.callListenerAfter() - } - - results := c.nPeekDup(c.results()) - instr := builder.AllocateInstruction() - - instr.AsReturn(results) - builder.InsertInstruction(instr) - state.unreachable = true - - case wasm.OpcodeUnreachable: - if state.unreachable { - break - } - exit := builder.AllocateInstruction() - exit.AsExitWithCode(c.execCtxPtrValue, wazevoapi.ExitCodeUnreachable) - builder.InsertInstruction(exit) - state.unreachable = true - - case wasm.OpcodeCallIndirect: - typeIndex := c.readI32u() - tableIndex := c.readI32u() - if state.unreachable { - break - } - c.lowerCallIndirect(typeIndex, tableIndex) - - case wasm.OpcodeCall: - fnIndex := c.readI32u() - if state.unreachable { - break - } - - var typIndex wasm.Index - if fnIndex < c.m.ImportFunctionCount { - // Before transfer the control to the callee, we have to store the current module's moduleContextPtr - // into execContext.callerModuleContextPtr in case when the callee is a Go function. - c.storeCallerModuleContext() - var fi int - for i := range c.m.ImportSection { - imp := &c.m.ImportSection[i] - if imp.Type == wasm.ExternTypeFunc { - if fi == int(fnIndex) { - typIndex = imp.DescFunc - break - } - fi++ - } - } - } else { - typIndex = c.m.FunctionSection[fnIndex-c.m.ImportFunctionCount] - } - typ := &c.m.TypeSection[typIndex] - - argN := len(typ.Params) - tail := len(state.values) - argN - vs := state.values[tail:] - state.values = state.values[:tail] - args := c.allocateVarLengthValues(2+len(vs), c.execCtxPtrValue) - - sig := c.signatures[typ] - call := builder.AllocateInstruction() - if fnIndex >= c.m.ImportFunctionCount { - args = args.Append(builder.VarLengthPool(), c.moduleCtxPtrValue) // This case the callee module is itself. - args = args.Append(builder.VarLengthPool(), vs...) - call.AsCall(FunctionIndexToFuncRef(fnIndex), sig, args) - builder.InsertInstruction(call) - } else { - // This case we have to read the address of the imported function from the module context. - moduleCtx := c.moduleCtxPtrValue - loadFuncPtr, loadModuleCtxPtr := builder.AllocateInstruction(), builder.AllocateInstruction() - funcPtrOffset, moduleCtxPtrOffset, _ := c.offset.ImportedFunctionOffset(fnIndex) - loadFuncPtr.AsLoad(moduleCtx, funcPtrOffset.U32(), ssa.TypeI64) - loadModuleCtxPtr.AsLoad(moduleCtx, moduleCtxPtrOffset.U32(), ssa.TypeI64) - builder.InsertInstruction(loadFuncPtr) - builder.InsertInstruction(loadModuleCtxPtr) - - args = args.Append(builder.VarLengthPool(), loadModuleCtxPtr.Return()) - args = args.Append(builder.VarLengthPool(), vs...) - call.AsCallIndirect(loadFuncPtr.Return(), sig, args) - builder.InsertInstruction(call) - } - - first, rest := call.Returns() - if first.Valid() { - state.push(first) - } - for _, v := range rest { - state.push(v) - } - - c.reloadAfterCall() - - case wasm.OpcodeDrop: - if state.unreachable { - break - } - _ = state.pop() - case wasm.OpcodeF64ConvertI32S, wasm.OpcodeF64ConvertI64S, wasm.OpcodeF64ConvertI32U, wasm.OpcodeF64ConvertI64U: - if state.unreachable { - break - } - result := builder.AllocateInstruction().AsFcvtFromInt( - state.pop(), - op == wasm.OpcodeF64ConvertI32S || op == wasm.OpcodeF64ConvertI64S, - true, - ).Insert(builder).Return() - state.push(result) - case wasm.OpcodeF32ConvertI32S, wasm.OpcodeF32ConvertI64S, wasm.OpcodeF32ConvertI32U, wasm.OpcodeF32ConvertI64U: - if state.unreachable { - break - } - result := builder.AllocateInstruction().AsFcvtFromInt( - state.pop(), - op == wasm.OpcodeF32ConvertI32S || op == wasm.OpcodeF32ConvertI64S, - false, - ).Insert(builder).Return() - state.push(result) - case wasm.OpcodeF32DemoteF64: - if state.unreachable { - break - } - cvt := builder.AllocateInstruction() - cvt.AsFdemote(state.pop()) - builder.InsertInstruction(cvt) - state.push(cvt.Return()) - case wasm.OpcodeF64PromoteF32: - if state.unreachable { - break - } - cvt := builder.AllocateInstruction() - cvt.AsFpromote(state.pop()) - builder.InsertInstruction(cvt) - state.push(cvt.Return()) - - case wasm.OpcodeVecPrefix: - state.pc++ - vecOp := c.wasmFunctionBody[state.pc] - switch vecOp { - case wasm.OpcodeVecV128Const: - state.pc++ - lo := binary.LittleEndian.Uint64(c.wasmFunctionBody[state.pc:]) - state.pc += 8 - hi := binary.LittleEndian.Uint64(c.wasmFunctionBody[state.pc:]) - state.pc += 7 - if state.unreachable { - break - } - ret := builder.AllocateInstruction().AsVconst(lo, hi).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecV128Load: - _, offset := c.readMemArg() - if state.unreachable { - break - } - baseAddr := state.pop() - addr := c.memOpSetup(baseAddr, uint64(offset), 16) - load := builder.AllocateInstruction() - load.AsLoad(addr, offset, ssa.TypeV128) - builder.InsertInstruction(load) - state.push(load.Return()) - case wasm.OpcodeVecV128Load8Lane, wasm.OpcodeVecV128Load16Lane, wasm.OpcodeVecV128Load32Lane: - _, offset := c.readMemArg() - state.pc++ - if state.unreachable { - break - } - var lane ssa.VecLane - var loadOp ssa.Opcode - var opSize uint64 - switch vecOp { - case wasm.OpcodeVecV128Load8Lane: - loadOp, lane, opSize = ssa.OpcodeUload8, ssa.VecLaneI8x16, 1 - case wasm.OpcodeVecV128Load16Lane: - loadOp, lane, opSize = ssa.OpcodeUload16, ssa.VecLaneI16x8, 2 - case wasm.OpcodeVecV128Load32Lane: - loadOp, lane, opSize = ssa.OpcodeUload32, ssa.VecLaneI32x4, 4 - } - laneIndex := c.wasmFunctionBody[state.pc] - vector := state.pop() - baseAddr := state.pop() - addr := c.memOpSetup(baseAddr, uint64(offset), opSize) - load := builder.AllocateInstruction(). - AsExtLoad(loadOp, addr, offset, false). - Insert(builder).Return() - ret := builder.AllocateInstruction(). - AsInsertlane(vector, load, laneIndex, lane). - Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecV128Load64Lane: - _, offset := c.readMemArg() - state.pc++ - if state.unreachable { - break - } - laneIndex := c.wasmFunctionBody[state.pc] - vector := state.pop() - baseAddr := state.pop() - addr := c.memOpSetup(baseAddr, uint64(offset), 8) - load := builder.AllocateInstruction(). - AsLoad(addr, offset, ssa.TypeI64). - Insert(builder).Return() - ret := builder.AllocateInstruction(). - AsInsertlane(vector, load, laneIndex, ssa.VecLaneI64x2). - Insert(builder).Return() - state.push(ret) - - case wasm.OpcodeVecV128Load32zero, wasm.OpcodeVecV128Load64zero: - _, offset := c.readMemArg() - if state.unreachable { - break - } - - var scalarType ssa.Type - switch vecOp { - case wasm.OpcodeVecV128Load32zero: - scalarType = ssa.TypeF32 - case wasm.OpcodeVecV128Load64zero: - scalarType = ssa.TypeF64 - } - - baseAddr := state.pop() - addr := c.memOpSetup(baseAddr, uint64(offset), uint64(scalarType.Size())) - - ret := builder.AllocateInstruction(). - AsVZeroExtLoad(addr, offset, scalarType). - Insert(builder).Return() - state.push(ret) - - case wasm.OpcodeVecV128Load8x8u, wasm.OpcodeVecV128Load8x8s, - wasm.OpcodeVecV128Load16x4u, wasm.OpcodeVecV128Load16x4s, - wasm.OpcodeVecV128Load32x2u, wasm.OpcodeVecV128Load32x2s: - _, offset := c.readMemArg() - if state.unreachable { - break - } - var lane ssa.VecLane - var signed bool - switch vecOp { - case wasm.OpcodeVecV128Load8x8s: - signed = true - fallthrough - case wasm.OpcodeVecV128Load8x8u: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecV128Load16x4s: - signed = true - fallthrough - case wasm.OpcodeVecV128Load16x4u: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecV128Load32x2s: - signed = true - fallthrough - case wasm.OpcodeVecV128Load32x2u: - lane = ssa.VecLaneI32x4 - } - baseAddr := state.pop() - addr := c.memOpSetup(baseAddr, uint64(offset), 8) - load := builder.AllocateInstruction(). - AsLoad(addr, offset, ssa.TypeF64). - Insert(builder).Return() - ret := builder.AllocateInstruction(). - AsWiden(load, lane, signed, true). - Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecV128Load8Splat, wasm.OpcodeVecV128Load16Splat, - wasm.OpcodeVecV128Load32Splat, wasm.OpcodeVecV128Load64Splat: - _, offset := c.readMemArg() - if state.unreachable { - break - } - var lane ssa.VecLane - var opSize uint64 - switch vecOp { - case wasm.OpcodeVecV128Load8Splat: - lane, opSize = ssa.VecLaneI8x16, 1 - case wasm.OpcodeVecV128Load16Splat: - lane, opSize = ssa.VecLaneI16x8, 2 - case wasm.OpcodeVecV128Load32Splat: - lane, opSize = ssa.VecLaneI32x4, 4 - case wasm.OpcodeVecV128Load64Splat: - lane, opSize = ssa.VecLaneI64x2, 8 - } - baseAddr := state.pop() - addr := c.memOpSetup(baseAddr, uint64(offset), opSize) - ret := builder.AllocateInstruction(). - AsLoadSplat(addr, offset, lane). - Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecV128Store: - _, offset := c.readMemArg() - if state.unreachable { - break - } - value := state.pop() - baseAddr := state.pop() - addr := c.memOpSetup(baseAddr, uint64(offset), 16) - builder.AllocateInstruction(). - AsStore(ssa.OpcodeStore, value, addr, offset). - Insert(builder) - case wasm.OpcodeVecV128Store8Lane, wasm.OpcodeVecV128Store16Lane, - wasm.OpcodeVecV128Store32Lane, wasm.OpcodeVecV128Store64Lane: - _, offset := c.readMemArg() - state.pc++ - if state.unreachable { - break - } - laneIndex := c.wasmFunctionBody[state.pc] - var storeOp ssa.Opcode - var lane ssa.VecLane - var opSize uint64 - switch vecOp { - case wasm.OpcodeVecV128Store8Lane: - storeOp, lane, opSize = ssa.OpcodeIstore8, ssa.VecLaneI8x16, 1 - case wasm.OpcodeVecV128Store16Lane: - storeOp, lane, opSize = ssa.OpcodeIstore16, ssa.VecLaneI16x8, 2 - case wasm.OpcodeVecV128Store32Lane: - storeOp, lane, opSize = ssa.OpcodeIstore32, ssa.VecLaneI32x4, 4 - case wasm.OpcodeVecV128Store64Lane: - storeOp, lane, opSize = ssa.OpcodeStore, ssa.VecLaneI64x2, 8 - } - vector := state.pop() - baseAddr := state.pop() - addr := c.memOpSetup(baseAddr, uint64(offset), opSize) - value := builder.AllocateInstruction(). - AsExtractlane(vector, laneIndex, lane, false). - Insert(builder).Return() - builder.AllocateInstruction(). - AsStore(storeOp, value, addr, offset). - Insert(builder) - case wasm.OpcodeVecV128Not: - if state.unreachable { - break - } - v1 := state.pop() - ret := builder.AllocateInstruction().AsVbnot(v1).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecV128And: - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVband(v1, v2).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecV128AndNot: - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVbandnot(v1, v2).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecV128Or: - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVbor(v1, v2).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecV128Xor: - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVbxor(v1, v2).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecV128Bitselect: - if state.unreachable { - break - } - c := state.pop() - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVbitselect(c, v1, v2).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecV128AnyTrue: - if state.unreachable { - break - } - v1 := state.pop() - ret := builder.AllocateInstruction().AsVanyTrue(v1).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16AllTrue, wasm.OpcodeVecI16x8AllTrue, wasm.OpcodeVecI32x4AllTrue, wasm.OpcodeVecI64x2AllTrue: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16AllTrue: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8AllTrue: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4AllTrue: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2AllTrue: - lane = ssa.VecLaneI64x2 - } - v1 := state.pop() - ret := builder.AllocateInstruction().AsVallTrue(v1, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16BitMask, wasm.OpcodeVecI16x8BitMask, wasm.OpcodeVecI32x4BitMask, wasm.OpcodeVecI64x2BitMask: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16BitMask: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8BitMask: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4BitMask: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2BitMask: - lane = ssa.VecLaneI64x2 - } - v1 := state.pop() - ret := builder.AllocateInstruction().AsVhighBits(v1, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16Abs, wasm.OpcodeVecI16x8Abs, wasm.OpcodeVecI32x4Abs, wasm.OpcodeVecI64x2Abs: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16Abs: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8Abs: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4Abs: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2Abs: - lane = ssa.VecLaneI64x2 - } - v1 := state.pop() - ret := builder.AllocateInstruction().AsVIabs(v1, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16Neg, wasm.OpcodeVecI16x8Neg, wasm.OpcodeVecI32x4Neg, wasm.OpcodeVecI64x2Neg: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16Neg: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8Neg: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4Neg: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2Neg: - lane = ssa.VecLaneI64x2 - } - v1 := state.pop() - ret := builder.AllocateInstruction().AsVIneg(v1, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16Popcnt: - if state.unreachable { - break - } - lane := ssa.VecLaneI8x16 - v1 := state.pop() - - ret := builder.AllocateInstruction().AsVIpopcnt(v1, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16Add, wasm.OpcodeVecI16x8Add, wasm.OpcodeVecI32x4Add, wasm.OpcodeVecI64x2Add: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16Add: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8Add: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4Add: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2Add: - lane = ssa.VecLaneI64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVIadd(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16AddSatS, wasm.OpcodeVecI16x8AddSatS: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16AddSatS: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8AddSatS: - lane = ssa.VecLaneI16x8 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVSaddSat(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16AddSatU, wasm.OpcodeVecI16x8AddSatU: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16AddSatU: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8AddSatU: - lane = ssa.VecLaneI16x8 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVUaddSat(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16SubSatS, wasm.OpcodeVecI16x8SubSatS: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16SubSatS: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8SubSatS: - lane = ssa.VecLaneI16x8 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVSsubSat(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16SubSatU, wasm.OpcodeVecI16x8SubSatU: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16SubSatU: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8SubSatU: - lane = ssa.VecLaneI16x8 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVUsubSat(v1, v2, lane).Insert(builder).Return() - state.push(ret) - - case wasm.OpcodeVecI8x16Sub, wasm.OpcodeVecI16x8Sub, wasm.OpcodeVecI32x4Sub, wasm.OpcodeVecI64x2Sub: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16Sub: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8Sub: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4Sub: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2Sub: - lane = ssa.VecLaneI64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVIsub(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16MinS, wasm.OpcodeVecI16x8MinS, wasm.OpcodeVecI32x4MinS: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16MinS: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8MinS: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4MinS: - lane = ssa.VecLaneI32x4 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVImin(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16MinU, wasm.OpcodeVecI16x8MinU, wasm.OpcodeVecI32x4MinU: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16MinU: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8MinU: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4MinU: - lane = ssa.VecLaneI32x4 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVUmin(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16MaxS, wasm.OpcodeVecI16x8MaxS, wasm.OpcodeVecI32x4MaxS: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16MaxS: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8MaxS: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4MaxS: - lane = ssa.VecLaneI32x4 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVImax(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16MaxU, wasm.OpcodeVecI16x8MaxU, wasm.OpcodeVecI32x4MaxU: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16MaxU: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8MaxU: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4MaxU: - lane = ssa.VecLaneI32x4 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVUmax(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16AvgrU, wasm.OpcodeVecI16x8AvgrU: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16AvgrU: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8AvgrU: - lane = ssa.VecLaneI16x8 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVAvgRound(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI16x8Mul, wasm.OpcodeVecI32x4Mul, wasm.OpcodeVecI64x2Mul: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI16x8Mul: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4Mul: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2Mul: - lane = ssa.VecLaneI64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVImul(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI16x8Q15mulrSatS: - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsSqmulRoundSat(v1, v2, ssa.VecLaneI16x8).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16Eq, wasm.OpcodeVecI16x8Eq, wasm.OpcodeVecI32x4Eq, wasm.OpcodeVecI64x2Eq: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16Eq: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8Eq: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4Eq: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2Eq: - lane = ssa.VecLaneI64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVIcmp(v1, v2, ssa.IntegerCmpCondEqual, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16Ne, wasm.OpcodeVecI16x8Ne, wasm.OpcodeVecI32x4Ne, wasm.OpcodeVecI64x2Ne: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16Ne: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8Ne: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4Ne: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2Ne: - lane = ssa.VecLaneI64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVIcmp(v1, v2, ssa.IntegerCmpCondNotEqual, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16LtS, wasm.OpcodeVecI16x8LtS, wasm.OpcodeVecI32x4LtS, wasm.OpcodeVecI64x2LtS: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16LtS: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8LtS: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4LtS: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2LtS: - lane = ssa.VecLaneI64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVIcmp(v1, v2, ssa.IntegerCmpCondSignedLessThan, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16LtU, wasm.OpcodeVecI16x8LtU, wasm.OpcodeVecI32x4LtU: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16LtU: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8LtU: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4LtU: - lane = ssa.VecLaneI32x4 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVIcmp(v1, v2, ssa.IntegerCmpCondUnsignedLessThan, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16LeS, wasm.OpcodeVecI16x8LeS, wasm.OpcodeVecI32x4LeS, wasm.OpcodeVecI64x2LeS: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16LeS: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8LeS: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4LeS: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2LeS: - lane = ssa.VecLaneI64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVIcmp(v1, v2, ssa.IntegerCmpCondSignedLessThanOrEqual, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16LeU, wasm.OpcodeVecI16x8LeU, wasm.OpcodeVecI32x4LeU: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16LeU: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8LeU: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4LeU: - lane = ssa.VecLaneI32x4 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVIcmp(v1, v2, ssa.IntegerCmpCondUnsignedLessThanOrEqual, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16GtS, wasm.OpcodeVecI16x8GtS, wasm.OpcodeVecI32x4GtS, wasm.OpcodeVecI64x2GtS: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16GtS: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8GtS: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4GtS: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2GtS: - lane = ssa.VecLaneI64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVIcmp(v1, v2, ssa.IntegerCmpCondSignedGreaterThan, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16GtU, wasm.OpcodeVecI16x8GtU, wasm.OpcodeVecI32x4GtU: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16GtU: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8GtU: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4GtU: - lane = ssa.VecLaneI32x4 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVIcmp(v1, v2, ssa.IntegerCmpCondUnsignedGreaterThan, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16GeS, wasm.OpcodeVecI16x8GeS, wasm.OpcodeVecI32x4GeS, wasm.OpcodeVecI64x2GeS: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16GeS: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8GeS: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4GeS: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2GeS: - lane = ssa.VecLaneI64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVIcmp(v1, v2, ssa.IntegerCmpCondSignedGreaterThanOrEqual, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16GeU, wasm.OpcodeVecI16x8GeU, wasm.OpcodeVecI32x4GeU: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16GeU: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8GeU: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4GeU: - lane = ssa.VecLaneI32x4 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVIcmp(v1, v2, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Max, wasm.OpcodeVecF64x2Max: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Max: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Max: - lane = ssa.VecLaneF64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVFmax(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Abs, wasm.OpcodeVecF64x2Abs: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Abs: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Abs: - lane = ssa.VecLaneF64x2 - } - v1 := state.pop() - ret := builder.AllocateInstruction().AsVFabs(v1, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Min, wasm.OpcodeVecF64x2Min: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Min: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Min: - lane = ssa.VecLaneF64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVFmin(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Neg, wasm.OpcodeVecF64x2Neg: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Neg: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Neg: - lane = ssa.VecLaneF64x2 - } - v1 := state.pop() - ret := builder.AllocateInstruction().AsVFneg(v1, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Sqrt, wasm.OpcodeVecF64x2Sqrt: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Sqrt: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Sqrt: - lane = ssa.VecLaneF64x2 - } - v1 := state.pop() - ret := builder.AllocateInstruction().AsVSqrt(v1, lane).Insert(builder).Return() - state.push(ret) - - case wasm.OpcodeVecF32x4Add, wasm.OpcodeVecF64x2Add: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Add: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Add: - lane = ssa.VecLaneF64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVFadd(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Sub, wasm.OpcodeVecF64x2Sub: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Sub: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Sub: - lane = ssa.VecLaneF64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVFsub(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Mul, wasm.OpcodeVecF64x2Mul: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Mul: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Mul: - lane = ssa.VecLaneF64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVFmul(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Div, wasm.OpcodeVecF64x2Div: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Div: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Div: - lane = ssa.VecLaneF64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVFdiv(v1, v2, lane).Insert(builder).Return() - state.push(ret) - - case wasm.OpcodeVecI16x8ExtaddPairwiseI8x16S, wasm.OpcodeVecI16x8ExtaddPairwiseI8x16U: - if state.unreachable { - break - } - v := state.pop() - signed := vecOp == wasm.OpcodeVecI16x8ExtaddPairwiseI8x16S - ret := builder.AllocateInstruction().AsExtIaddPairwise(v, ssa.VecLaneI8x16, signed).Insert(builder).Return() - state.push(ret) - - case wasm.OpcodeVecI32x4ExtaddPairwiseI16x8S, wasm.OpcodeVecI32x4ExtaddPairwiseI16x8U: - if state.unreachable { - break - } - v := state.pop() - signed := vecOp == wasm.OpcodeVecI32x4ExtaddPairwiseI16x8S - ret := builder.AllocateInstruction().AsExtIaddPairwise(v, ssa.VecLaneI16x8, signed).Insert(builder).Return() - state.push(ret) - - case wasm.OpcodeVecI16x8ExtMulLowI8x16S, wasm.OpcodeVecI16x8ExtMulLowI8x16U: - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - ret := c.lowerExtMul( - v1, v2, - ssa.VecLaneI8x16, ssa.VecLaneI16x8, - vecOp == wasm.OpcodeVecI16x8ExtMulLowI8x16S, true) - state.push(ret) - - case wasm.OpcodeVecI16x8ExtMulHighI8x16S, wasm.OpcodeVecI16x8ExtMulHighI8x16U: - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - ret := c.lowerExtMul( - v1, v2, - ssa.VecLaneI8x16, ssa.VecLaneI16x8, - vecOp == wasm.OpcodeVecI16x8ExtMulHighI8x16S, false) - state.push(ret) - - case wasm.OpcodeVecI32x4ExtMulLowI16x8S, wasm.OpcodeVecI32x4ExtMulLowI16x8U: - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - ret := c.lowerExtMul( - v1, v2, - ssa.VecLaneI16x8, ssa.VecLaneI32x4, - vecOp == wasm.OpcodeVecI32x4ExtMulLowI16x8S, true) - state.push(ret) - - case wasm.OpcodeVecI32x4ExtMulHighI16x8S, wasm.OpcodeVecI32x4ExtMulHighI16x8U: - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - ret := c.lowerExtMul( - v1, v2, - ssa.VecLaneI16x8, ssa.VecLaneI32x4, - vecOp == wasm.OpcodeVecI32x4ExtMulHighI16x8S, false) - state.push(ret) - case wasm.OpcodeVecI64x2ExtMulLowI32x4S, wasm.OpcodeVecI64x2ExtMulLowI32x4U: - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - ret := c.lowerExtMul( - v1, v2, - ssa.VecLaneI32x4, ssa.VecLaneI64x2, - vecOp == wasm.OpcodeVecI64x2ExtMulLowI32x4S, true) - state.push(ret) - - case wasm.OpcodeVecI64x2ExtMulHighI32x4S, wasm.OpcodeVecI64x2ExtMulHighI32x4U: - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - ret := c.lowerExtMul( - v1, v2, - ssa.VecLaneI32x4, ssa.VecLaneI64x2, - vecOp == wasm.OpcodeVecI64x2ExtMulHighI32x4S, false) - state.push(ret) - - case wasm.OpcodeVecI32x4DotI16x8S: - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - - ret := builder.AllocateInstruction().AsWideningPairwiseDotProductS(v1, v2).Insert(builder).Return() - state.push(ret) - - case wasm.OpcodeVecF32x4Eq, wasm.OpcodeVecF64x2Eq: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Eq: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Eq: - lane = ssa.VecLaneF64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVFcmp(v1, v2, ssa.FloatCmpCondEqual, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Ne, wasm.OpcodeVecF64x2Ne: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Ne: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Ne: - lane = ssa.VecLaneF64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVFcmp(v1, v2, ssa.FloatCmpCondNotEqual, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Lt, wasm.OpcodeVecF64x2Lt: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Lt: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Lt: - lane = ssa.VecLaneF64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVFcmp(v1, v2, ssa.FloatCmpCondLessThan, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Le, wasm.OpcodeVecF64x2Le: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Le: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Le: - lane = ssa.VecLaneF64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVFcmp(v1, v2, ssa.FloatCmpCondLessThanOrEqual, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Gt, wasm.OpcodeVecF64x2Gt: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Gt: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Gt: - lane = ssa.VecLaneF64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVFcmp(v1, v2, ssa.FloatCmpCondGreaterThan, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Ge, wasm.OpcodeVecF64x2Ge: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Ge: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Ge: - lane = ssa.VecLaneF64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVFcmp(v1, v2, ssa.FloatCmpCondGreaterThanOrEqual, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Ceil, wasm.OpcodeVecF64x2Ceil: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Ceil: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Ceil: - lane = ssa.VecLaneF64x2 - } - v1 := state.pop() - ret := builder.AllocateInstruction().AsVCeil(v1, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Floor, wasm.OpcodeVecF64x2Floor: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Floor: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Floor: - lane = ssa.VecLaneF64x2 - } - v1 := state.pop() - ret := builder.AllocateInstruction().AsVFloor(v1, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Trunc, wasm.OpcodeVecF64x2Trunc: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Trunc: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Trunc: - lane = ssa.VecLaneF64x2 - } - v1 := state.pop() - ret := builder.AllocateInstruction().AsVTrunc(v1, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Nearest, wasm.OpcodeVecF64x2Nearest: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Nearest: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Nearest: - lane = ssa.VecLaneF64x2 - } - v1 := state.pop() - ret := builder.AllocateInstruction().AsVNearest(v1, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Pmin, wasm.OpcodeVecF64x2Pmin: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Pmin: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Pmin: - lane = ssa.VecLaneF64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVMinPseudo(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4Pmax, wasm.OpcodeVecF64x2Pmax: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecF32x4Pmax: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Pmax: - lane = ssa.VecLaneF64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVMaxPseudo(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI32x4TruncSatF32x4S, wasm.OpcodeVecI32x4TruncSatF32x4U: - if state.unreachable { - break - } - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVFcvtToIntSat(v1, ssa.VecLaneF32x4, vecOp == wasm.OpcodeVecI32x4TruncSatF32x4S).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI32x4TruncSatF64x2SZero, wasm.OpcodeVecI32x4TruncSatF64x2UZero: - if state.unreachable { - break - } - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVFcvtToIntSat(v1, ssa.VecLaneF64x2, vecOp == wasm.OpcodeVecI32x4TruncSatF64x2SZero).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4ConvertI32x4S, wasm.OpcodeVecF32x4ConvertI32x4U: - if state.unreachable { - break - } - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsVFcvtFromInt(v1, ssa.VecLaneF32x4, vecOp == wasm.OpcodeVecF32x4ConvertI32x4S).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF64x2ConvertLowI32x4S, wasm.OpcodeVecF64x2ConvertLowI32x4U: - if state.unreachable { - break - } - v1 := state.pop() - if runtime.GOARCH == "arm64" { - // TODO: this is weird. fix. - v1 = builder.AllocateInstruction(). - AsWiden(v1, ssa.VecLaneI32x4, vecOp == wasm.OpcodeVecF64x2ConvertLowI32x4S, true).Insert(builder).Return() - } - ret := builder.AllocateInstruction(). - AsVFcvtFromInt(v1, ssa.VecLaneF64x2, vecOp == wasm.OpcodeVecF64x2ConvertLowI32x4S). - Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16NarrowI16x8S, wasm.OpcodeVecI8x16NarrowI16x8U: - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsNarrow(v1, v2, ssa.VecLaneI16x8, vecOp == wasm.OpcodeVecI8x16NarrowI16x8S). - Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI16x8NarrowI32x4S, wasm.OpcodeVecI16x8NarrowI32x4U: - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsNarrow(v1, v2, ssa.VecLaneI32x4, vecOp == wasm.OpcodeVecI16x8NarrowI32x4S). - Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI16x8ExtendLowI8x16S, wasm.OpcodeVecI16x8ExtendLowI8x16U: - if state.unreachable { - break - } - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsWiden(v1, ssa.VecLaneI8x16, vecOp == wasm.OpcodeVecI16x8ExtendLowI8x16S, true). - Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI16x8ExtendHighI8x16S, wasm.OpcodeVecI16x8ExtendHighI8x16U: - if state.unreachable { - break - } - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsWiden(v1, ssa.VecLaneI8x16, vecOp == wasm.OpcodeVecI16x8ExtendHighI8x16S, false). - Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI32x4ExtendLowI16x8S, wasm.OpcodeVecI32x4ExtendLowI16x8U: - if state.unreachable { - break - } - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsWiden(v1, ssa.VecLaneI16x8, vecOp == wasm.OpcodeVecI32x4ExtendLowI16x8S, true). - Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI32x4ExtendHighI16x8S, wasm.OpcodeVecI32x4ExtendHighI16x8U: - if state.unreachable { - break - } - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsWiden(v1, ssa.VecLaneI16x8, vecOp == wasm.OpcodeVecI32x4ExtendHighI16x8S, false). - Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI64x2ExtendLowI32x4S, wasm.OpcodeVecI64x2ExtendLowI32x4U: - if state.unreachable { - break - } - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsWiden(v1, ssa.VecLaneI32x4, vecOp == wasm.OpcodeVecI64x2ExtendLowI32x4S, true). - Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI64x2ExtendHighI32x4S, wasm.OpcodeVecI64x2ExtendHighI32x4U: - if state.unreachable { - break - } - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsWiden(v1, ssa.VecLaneI32x4, vecOp == wasm.OpcodeVecI64x2ExtendHighI32x4S, false). - Insert(builder).Return() - state.push(ret) - - case wasm.OpcodeVecF64x2PromoteLowF32x4Zero: - if state.unreachable { - break - } - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsFvpromoteLow(v1, ssa.VecLaneF32x4). - Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecF32x4DemoteF64x2Zero: - if state.unreachable { - break - } - v1 := state.pop() - ret := builder.AllocateInstruction(). - AsFvdemote(v1, ssa.VecLaneF64x2). - Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16Shl, wasm.OpcodeVecI16x8Shl, wasm.OpcodeVecI32x4Shl, wasm.OpcodeVecI64x2Shl: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16Shl: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8Shl: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4Shl: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2Shl: - lane = ssa.VecLaneI64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVIshl(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16ShrS, wasm.OpcodeVecI16x8ShrS, wasm.OpcodeVecI32x4ShrS, wasm.OpcodeVecI64x2ShrS: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16ShrS: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8ShrS: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4ShrS: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2ShrS: - lane = ssa.VecLaneI64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVSshr(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16ShrU, wasm.OpcodeVecI16x8ShrU, wasm.OpcodeVecI32x4ShrU, wasm.OpcodeVecI64x2ShrU: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16ShrU: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8ShrU: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4ShrU: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2ShrU: - lane = ssa.VecLaneI64x2 - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsVUshr(v1, v2, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecI8x16ExtractLaneS, wasm.OpcodeVecI16x8ExtractLaneS: - state.pc++ - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16ExtractLaneS: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8ExtractLaneS: - lane = ssa.VecLaneI16x8 - } - v1 := state.pop() - index := c.wasmFunctionBody[state.pc] - ext := builder.AllocateInstruction().AsExtractlane(v1, index, lane, true).Insert(builder).Return() - state.push(ext) - case wasm.OpcodeVecI8x16ExtractLaneU, wasm.OpcodeVecI16x8ExtractLaneU, - wasm.OpcodeVecI32x4ExtractLane, wasm.OpcodeVecI64x2ExtractLane, - wasm.OpcodeVecF32x4ExtractLane, wasm.OpcodeVecF64x2ExtractLane: - state.pc++ // Skip the immediate value. - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16ExtractLaneU: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8ExtractLaneU: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4ExtractLane: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2ExtractLane: - lane = ssa.VecLaneI64x2 - case wasm.OpcodeVecF32x4ExtractLane: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2ExtractLane: - lane = ssa.VecLaneF64x2 - } - v1 := state.pop() - index := c.wasmFunctionBody[state.pc] - ext := builder.AllocateInstruction().AsExtractlane(v1, index, lane, false).Insert(builder).Return() - state.push(ext) - case wasm.OpcodeVecI8x16ReplaceLane, wasm.OpcodeVecI16x8ReplaceLane, - wasm.OpcodeVecI32x4ReplaceLane, wasm.OpcodeVecI64x2ReplaceLane, - wasm.OpcodeVecF32x4ReplaceLane, wasm.OpcodeVecF64x2ReplaceLane: - state.pc++ - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16ReplaceLane: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8ReplaceLane: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4ReplaceLane: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2ReplaceLane: - lane = ssa.VecLaneI64x2 - case wasm.OpcodeVecF32x4ReplaceLane: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2ReplaceLane: - lane = ssa.VecLaneF64x2 - } - v2 := state.pop() - v1 := state.pop() - index := c.wasmFunctionBody[state.pc] - ret := builder.AllocateInstruction().AsInsertlane(v1, v2, index, lane).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeVecV128i8x16Shuffle: - state.pc++ - laneIndexes := c.wasmFunctionBody[state.pc : state.pc+16] - state.pc += 15 - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsShuffle(v1, v2, laneIndexes).Insert(builder).Return() - state.push(ret) - - case wasm.OpcodeVecI8x16Swizzle: - if state.unreachable { - break - } - v2 := state.pop() - v1 := state.pop() - ret := builder.AllocateInstruction().AsSwizzle(v1, v2, ssa.VecLaneI8x16).Insert(builder).Return() - state.push(ret) - - case wasm.OpcodeVecI8x16Splat, - wasm.OpcodeVecI16x8Splat, - wasm.OpcodeVecI32x4Splat, - wasm.OpcodeVecI64x2Splat, - wasm.OpcodeVecF32x4Splat, - wasm.OpcodeVecF64x2Splat: - if state.unreachable { - break - } - var lane ssa.VecLane - switch vecOp { - case wasm.OpcodeVecI8x16Splat: - lane = ssa.VecLaneI8x16 - case wasm.OpcodeVecI16x8Splat: - lane = ssa.VecLaneI16x8 - case wasm.OpcodeVecI32x4Splat: - lane = ssa.VecLaneI32x4 - case wasm.OpcodeVecI64x2Splat: - lane = ssa.VecLaneI64x2 - case wasm.OpcodeVecF32x4Splat: - lane = ssa.VecLaneF32x4 - case wasm.OpcodeVecF64x2Splat: - lane = ssa.VecLaneF64x2 - } - v1 := state.pop() - ret := builder.AllocateInstruction().AsSplat(v1, lane).Insert(builder).Return() - state.push(ret) - - default: - panic("TODO: unsupported vector instruction: " + wasm.VectorInstructionName(vecOp)) - } - case wasm.OpcodeAtomicPrefix: - state.pc++ - atomicOp := c.wasmFunctionBody[state.pc] - switch atomicOp { - case wasm.OpcodeAtomicMemoryWait32, wasm.OpcodeAtomicMemoryWait64: - _, offset := c.readMemArg() - if state.unreachable { - break - } - - c.storeCallerModuleContext() - - var opSize uint64 - var trampoline wazevoapi.Offset - var sig *ssa.Signature - switch atomicOp { - case wasm.OpcodeAtomicMemoryWait32: - opSize = 4 - trampoline = wazevoapi.ExecutionContextOffsetMemoryWait32TrampolineAddress - sig = &c.memoryWait32Sig - case wasm.OpcodeAtomicMemoryWait64: - opSize = 8 - trampoline = wazevoapi.ExecutionContextOffsetMemoryWait64TrampolineAddress - sig = &c.memoryWait64Sig - } - - timeout := state.pop() - exp := state.pop() - baseAddr := state.pop() - addr := c.atomicMemOpSetup(baseAddr, uint64(offset), opSize) - - memoryWaitPtr := builder.AllocateInstruction(). - AsLoad(c.execCtxPtrValue, - trampoline.U32(), - ssa.TypeI64, - ).Insert(builder).Return() - - args := c.allocateVarLengthValues(3, c.execCtxPtrValue, timeout, exp, addr) - memoryWaitRet := builder.AllocateInstruction(). - AsCallIndirect(memoryWaitPtr, sig, args). - Insert(builder).Return() - state.push(memoryWaitRet) - case wasm.OpcodeAtomicMemoryNotify: - _, offset := c.readMemArg() - if state.unreachable { - break - } - - c.storeCallerModuleContext() - count := state.pop() - baseAddr := state.pop() - addr := c.atomicMemOpSetup(baseAddr, uint64(offset), 4) - - memoryNotifyPtr := builder.AllocateInstruction(). - AsLoad(c.execCtxPtrValue, - wazevoapi.ExecutionContextOffsetMemoryNotifyTrampolineAddress.U32(), - ssa.TypeI64, - ).Insert(builder).Return() - args := c.allocateVarLengthValues(2, c.execCtxPtrValue, count, addr) - memoryNotifyRet := builder.AllocateInstruction(). - AsCallIndirect(memoryNotifyPtr, &c.memoryNotifySig, args). - Insert(builder).Return() - state.push(memoryNotifyRet) - case wasm.OpcodeAtomicI32Load, wasm.OpcodeAtomicI64Load, wasm.OpcodeAtomicI32Load8U, wasm.OpcodeAtomicI32Load16U, wasm.OpcodeAtomicI64Load8U, wasm.OpcodeAtomicI64Load16U, wasm.OpcodeAtomicI64Load32U: - _, offset := c.readMemArg() - if state.unreachable { - break - } - - baseAddr := state.pop() - - var size uint64 - switch atomicOp { - case wasm.OpcodeAtomicI64Load: - size = 8 - case wasm.OpcodeAtomicI32Load, wasm.OpcodeAtomicI64Load32U: - size = 4 - case wasm.OpcodeAtomicI32Load16U, wasm.OpcodeAtomicI64Load16U: - size = 2 - case wasm.OpcodeAtomicI32Load8U, wasm.OpcodeAtomicI64Load8U: - size = 1 - } - - var typ ssa.Type - switch atomicOp { - case wasm.OpcodeAtomicI64Load, wasm.OpcodeAtomicI64Load32U, wasm.OpcodeAtomicI64Load16U, wasm.OpcodeAtomicI64Load8U: - typ = ssa.TypeI64 - case wasm.OpcodeAtomicI32Load, wasm.OpcodeAtomicI32Load16U, wasm.OpcodeAtomicI32Load8U: - typ = ssa.TypeI32 - } - - addr := c.atomicMemOpSetup(baseAddr, uint64(offset), size) - res := builder.AllocateInstruction().AsAtomicLoad(addr, size, typ).Insert(builder).Return() - state.push(res) - case wasm.OpcodeAtomicI32Store, wasm.OpcodeAtomicI64Store, wasm.OpcodeAtomicI32Store8, wasm.OpcodeAtomicI32Store16, wasm.OpcodeAtomicI64Store8, wasm.OpcodeAtomicI64Store16, wasm.OpcodeAtomicI64Store32: - _, offset := c.readMemArg() - if state.unreachable { - break - } - - val := state.pop() - baseAddr := state.pop() - - var size uint64 - switch atomicOp { - case wasm.OpcodeAtomicI64Store: - size = 8 - case wasm.OpcodeAtomicI32Store, wasm.OpcodeAtomicI64Store32: - size = 4 - case wasm.OpcodeAtomicI32Store16, wasm.OpcodeAtomicI64Store16: - size = 2 - case wasm.OpcodeAtomicI32Store8, wasm.OpcodeAtomicI64Store8: - size = 1 - } - - addr := c.atomicMemOpSetup(baseAddr, uint64(offset), size) - builder.AllocateInstruction().AsAtomicStore(addr, val, size).Insert(builder) - case wasm.OpcodeAtomicI32RmwAdd, wasm.OpcodeAtomicI64RmwAdd, wasm.OpcodeAtomicI32Rmw8AddU, wasm.OpcodeAtomicI32Rmw16AddU, wasm.OpcodeAtomicI64Rmw8AddU, wasm.OpcodeAtomicI64Rmw16AddU, wasm.OpcodeAtomicI64Rmw32AddU, - wasm.OpcodeAtomicI32RmwSub, wasm.OpcodeAtomicI64RmwSub, wasm.OpcodeAtomicI32Rmw8SubU, wasm.OpcodeAtomicI32Rmw16SubU, wasm.OpcodeAtomicI64Rmw8SubU, wasm.OpcodeAtomicI64Rmw16SubU, wasm.OpcodeAtomicI64Rmw32SubU, - wasm.OpcodeAtomicI32RmwAnd, wasm.OpcodeAtomicI64RmwAnd, wasm.OpcodeAtomicI32Rmw8AndU, wasm.OpcodeAtomicI32Rmw16AndU, wasm.OpcodeAtomicI64Rmw8AndU, wasm.OpcodeAtomicI64Rmw16AndU, wasm.OpcodeAtomicI64Rmw32AndU, - wasm.OpcodeAtomicI32RmwOr, wasm.OpcodeAtomicI64RmwOr, wasm.OpcodeAtomicI32Rmw8OrU, wasm.OpcodeAtomicI32Rmw16OrU, wasm.OpcodeAtomicI64Rmw8OrU, wasm.OpcodeAtomicI64Rmw16OrU, wasm.OpcodeAtomicI64Rmw32OrU, - wasm.OpcodeAtomicI32RmwXor, wasm.OpcodeAtomicI64RmwXor, wasm.OpcodeAtomicI32Rmw8XorU, wasm.OpcodeAtomicI32Rmw16XorU, wasm.OpcodeAtomicI64Rmw8XorU, wasm.OpcodeAtomicI64Rmw16XorU, wasm.OpcodeAtomicI64Rmw32XorU, - wasm.OpcodeAtomicI32RmwXchg, wasm.OpcodeAtomicI64RmwXchg, wasm.OpcodeAtomicI32Rmw8XchgU, wasm.OpcodeAtomicI32Rmw16XchgU, wasm.OpcodeAtomicI64Rmw8XchgU, wasm.OpcodeAtomicI64Rmw16XchgU, wasm.OpcodeAtomicI64Rmw32XchgU: - _, offset := c.readMemArg() - if state.unreachable { - break - } - - val := state.pop() - baseAddr := state.pop() - - var rmwOp ssa.AtomicRmwOp - var size uint64 - switch atomicOp { - case wasm.OpcodeAtomicI32RmwAdd, wasm.OpcodeAtomicI64RmwAdd, wasm.OpcodeAtomicI32Rmw8AddU, wasm.OpcodeAtomicI32Rmw16AddU, wasm.OpcodeAtomicI64Rmw8AddU, wasm.OpcodeAtomicI64Rmw16AddU, wasm.OpcodeAtomicI64Rmw32AddU: - rmwOp = ssa.AtomicRmwOpAdd - switch atomicOp { - case wasm.OpcodeAtomicI64RmwAdd: - size = 8 - case wasm.OpcodeAtomicI32RmwAdd, wasm.OpcodeAtomicI64Rmw32AddU: - size = 4 - case wasm.OpcodeAtomicI32Rmw16AddU, wasm.OpcodeAtomicI64Rmw16AddU: - size = 2 - case wasm.OpcodeAtomicI32Rmw8AddU, wasm.OpcodeAtomicI64Rmw8AddU: - size = 1 - } - case wasm.OpcodeAtomicI32RmwSub, wasm.OpcodeAtomicI64RmwSub, wasm.OpcodeAtomicI32Rmw8SubU, wasm.OpcodeAtomicI32Rmw16SubU, wasm.OpcodeAtomicI64Rmw8SubU, wasm.OpcodeAtomicI64Rmw16SubU, wasm.OpcodeAtomicI64Rmw32SubU: - rmwOp = ssa.AtomicRmwOpSub - switch atomicOp { - case wasm.OpcodeAtomicI64RmwSub: - size = 8 - case wasm.OpcodeAtomicI32RmwSub, wasm.OpcodeAtomicI64Rmw32SubU: - size = 4 - case wasm.OpcodeAtomicI32Rmw16SubU, wasm.OpcodeAtomicI64Rmw16SubU: - size = 2 - case wasm.OpcodeAtomicI32Rmw8SubU, wasm.OpcodeAtomicI64Rmw8SubU: - size = 1 - } - case wasm.OpcodeAtomicI32RmwAnd, wasm.OpcodeAtomicI64RmwAnd, wasm.OpcodeAtomicI32Rmw8AndU, wasm.OpcodeAtomicI32Rmw16AndU, wasm.OpcodeAtomicI64Rmw8AndU, wasm.OpcodeAtomicI64Rmw16AndU, wasm.OpcodeAtomicI64Rmw32AndU: - rmwOp = ssa.AtomicRmwOpAnd - switch atomicOp { - case wasm.OpcodeAtomicI64RmwAnd: - size = 8 - case wasm.OpcodeAtomicI32RmwAnd, wasm.OpcodeAtomicI64Rmw32AndU: - size = 4 - case wasm.OpcodeAtomicI32Rmw16AndU, wasm.OpcodeAtomicI64Rmw16AndU: - size = 2 - case wasm.OpcodeAtomicI32Rmw8AndU, wasm.OpcodeAtomicI64Rmw8AndU: - size = 1 - } - case wasm.OpcodeAtomicI32RmwOr, wasm.OpcodeAtomicI64RmwOr, wasm.OpcodeAtomicI32Rmw8OrU, wasm.OpcodeAtomicI32Rmw16OrU, wasm.OpcodeAtomicI64Rmw8OrU, wasm.OpcodeAtomicI64Rmw16OrU, wasm.OpcodeAtomicI64Rmw32OrU: - rmwOp = ssa.AtomicRmwOpOr - switch atomicOp { - case wasm.OpcodeAtomicI64RmwOr: - size = 8 - case wasm.OpcodeAtomicI32RmwOr, wasm.OpcodeAtomicI64Rmw32OrU: - size = 4 - case wasm.OpcodeAtomicI32Rmw16OrU, wasm.OpcodeAtomicI64Rmw16OrU: - size = 2 - case wasm.OpcodeAtomicI32Rmw8OrU, wasm.OpcodeAtomicI64Rmw8OrU: - size = 1 - } - case wasm.OpcodeAtomicI32RmwXor, wasm.OpcodeAtomicI64RmwXor, wasm.OpcodeAtomicI32Rmw8XorU, wasm.OpcodeAtomicI32Rmw16XorU, wasm.OpcodeAtomicI64Rmw8XorU, wasm.OpcodeAtomicI64Rmw16XorU, wasm.OpcodeAtomicI64Rmw32XorU: - rmwOp = ssa.AtomicRmwOpXor - switch atomicOp { - case wasm.OpcodeAtomicI64RmwXor: - size = 8 - case wasm.OpcodeAtomicI32RmwXor, wasm.OpcodeAtomicI64Rmw32XorU: - size = 4 - case wasm.OpcodeAtomicI32Rmw16XorU, wasm.OpcodeAtomicI64Rmw16XorU: - size = 2 - case wasm.OpcodeAtomicI32Rmw8XorU, wasm.OpcodeAtomicI64Rmw8XorU: - size = 1 - } - case wasm.OpcodeAtomicI32RmwXchg, wasm.OpcodeAtomicI64RmwXchg, wasm.OpcodeAtomicI32Rmw8XchgU, wasm.OpcodeAtomicI32Rmw16XchgU, wasm.OpcodeAtomicI64Rmw8XchgU, wasm.OpcodeAtomicI64Rmw16XchgU, wasm.OpcodeAtomicI64Rmw32XchgU: - rmwOp = ssa.AtomicRmwOpXchg - switch atomicOp { - case wasm.OpcodeAtomicI64RmwXchg: - size = 8 - case wasm.OpcodeAtomicI32RmwXchg, wasm.OpcodeAtomicI64Rmw32XchgU: - size = 4 - case wasm.OpcodeAtomicI32Rmw16XchgU, wasm.OpcodeAtomicI64Rmw16XchgU: - size = 2 - case wasm.OpcodeAtomicI32Rmw8XchgU, wasm.OpcodeAtomicI64Rmw8XchgU: - size = 1 - } - } - - addr := c.atomicMemOpSetup(baseAddr, uint64(offset), size) - res := builder.AllocateInstruction().AsAtomicRmw(rmwOp, addr, val, size).Insert(builder).Return() - state.push(res) - case wasm.OpcodeAtomicI32RmwCmpxchg, wasm.OpcodeAtomicI64RmwCmpxchg, wasm.OpcodeAtomicI32Rmw8CmpxchgU, wasm.OpcodeAtomicI32Rmw16CmpxchgU, wasm.OpcodeAtomicI64Rmw8CmpxchgU, wasm.OpcodeAtomicI64Rmw16CmpxchgU, wasm.OpcodeAtomicI64Rmw32CmpxchgU: - _, offset := c.readMemArg() - if state.unreachable { - break - } - - repl := state.pop() - exp := state.pop() - baseAddr := state.pop() - - var size uint64 - switch atomicOp { - case wasm.OpcodeAtomicI64RmwCmpxchg: - size = 8 - case wasm.OpcodeAtomicI32RmwCmpxchg, wasm.OpcodeAtomicI64Rmw32CmpxchgU: - size = 4 - case wasm.OpcodeAtomicI32Rmw16CmpxchgU, wasm.OpcodeAtomicI64Rmw16CmpxchgU: - size = 2 - case wasm.OpcodeAtomicI32Rmw8CmpxchgU, wasm.OpcodeAtomicI64Rmw8CmpxchgU: - size = 1 - } - addr := c.atomicMemOpSetup(baseAddr, uint64(offset), size) - res := builder.AllocateInstruction().AsAtomicCas(addr, exp, repl, size).Insert(builder).Return() - state.push(res) - case wasm.OpcodeAtomicFence: - order := c.readByte() - if state.unreachable { - break - } - if c.needMemory { - builder.AllocateInstruction().AsFence(order).Insert(builder) - } - default: - panic("TODO: unsupported atomic instruction: " + wasm.AtomicInstructionName(atomicOp)) - } - case wasm.OpcodeRefFunc: - funcIndex := c.readI32u() - if state.unreachable { - break - } - - c.storeCallerModuleContext() - - funcIndexVal := builder.AllocateInstruction().AsIconst32(funcIndex).Insert(builder).Return() - - refFuncPtr := builder.AllocateInstruction(). - AsLoad(c.execCtxPtrValue, - wazevoapi.ExecutionContextOffsetRefFuncTrampolineAddress.U32(), - ssa.TypeI64, - ).Insert(builder).Return() - - args := c.allocateVarLengthValues(2, c.execCtxPtrValue, funcIndexVal) - refFuncRet := builder. - AllocateInstruction(). - AsCallIndirect(refFuncPtr, &c.refFuncSig, args). - Insert(builder).Return() - state.push(refFuncRet) - - case wasm.OpcodeRefNull: - c.loweringState.pc++ // skips the reference type as we treat both of them as i64(0). - if state.unreachable { - break - } - ret := builder.AllocateInstruction().AsIconst64(0).Insert(builder).Return() - state.push(ret) - case wasm.OpcodeRefIsNull: - if state.unreachable { - break - } - r := state.pop() - zero := builder.AllocateInstruction().AsIconst64(0).Insert(builder) - icmp := builder.AllocateInstruction(). - AsIcmp(r, zero.Return(), ssa.IntegerCmpCondEqual). - Insert(builder). - Return() - state.push(icmp) - case wasm.OpcodeTableSet: - tableIndex := c.readI32u() - if state.unreachable { - break - } - r := state.pop() - targetOffsetInTable := state.pop() - - elementAddr := c.lowerAccessTableWithBoundsCheck(tableIndex, targetOffsetInTable) - builder.AllocateInstruction().AsStore(ssa.OpcodeStore, r, elementAddr, 0).Insert(builder) - - case wasm.OpcodeTableGet: - tableIndex := c.readI32u() - if state.unreachable { - break - } - targetOffsetInTable := state.pop() - elementAddr := c.lowerAccessTableWithBoundsCheck(tableIndex, targetOffsetInTable) - loaded := builder.AllocateInstruction().AsLoad(elementAddr, 0, ssa.TypeI64).Insert(builder).Return() - state.push(loaded) - default: - panic("TODO: unsupported in wazevo yet: " + wasm.InstructionName(op)) - } - - if wazevoapi.FrontEndLoggingEnabled { - fmt.Println("--------- Translated " + wasm.InstructionName(op) + " --------") - fmt.Println("state: " + c.loweringState.String()) - fmt.Println(c.formatBuilder()) - fmt.Println("--------------------------") - } - c.loweringState.pc++ -} - -func (c *Compiler) lowerExtMul(v1, v2 ssa.Value, from, to ssa.VecLane, signed, low bool) ssa.Value { - // TODO: The sequence `Widen; Widen; VIMul` can be substituted for a single instruction on some ISAs. - builder := c.ssaBuilder - - v1lo := builder.AllocateInstruction().AsWiden(v1, from, signed, low).Insert(builder).Return() - v2lo := builder.AllocateInstruction().AsWiden(v2, from, signed, low).Insert(builder).Return() - - return builder.AllocateInstruction().AsVImul(v1lo, v2lo, to).Insert(builder).Return() -} - -const ( - tableInstanceBaseAddressOffset = 0 - tableInstanceLenOffset = tableInstanceBaseAddressOffset + 8 -) - -func (c *Compiler) lowerAccessTableWithBoundsCheck(tableIndex uint32, elementOffsetInTable ssa.Value) (elementAddress ssa.Value) { - builder := c.ssaBuilder - - // Load the table. - loadTableInstancePtr := builder.AllocateInstruction() - loadTableInstancePtr.AsLoad(c.moduleCtxPtrValue, c.offset.TableOffset(int(tableIndex)).U32(), ssa.TypeI64) - builder.InsertInstruction(loadTableInstancePtr) - tableInstancePtr := loadTableInstancePtr.Return() - - // Load the table's length. - loadTableLen := builder.AllocateInstruction() - loadTableLen.AsLoad(tableInstancePtr, tableInstanceLenOffset, ssa.TypeI32) - builder.InsertInstruction(loadTableLen) - tableLen := loadTableLen.Return() - - // Compare the length and the target, and trap if out of bounds. - checkOOB := builder.AllocateInstruction() - checkOOB.AsIcmp(elementOffsetInTable, tableLen, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual) - builder.InsertInstruction(checkOOB) - exitIfOOB := builder.AllocateInstruction() - exitIfOOB.AsExitIfTrueWithCode(c.execCtxPtrValue, checkOOB.Return(), wazevoapi.ExitCodeTableOutOfBounds) - builder.InsertInstruction(exitIfOOB) - - // Get the base address of wasm.TableInstance.References. - loadTableBaseAddress := builder.AllocateInstruction() - loadTableBaseAddress.AsLoad(tableInstancePtr, tableInstanceBaseAddressOffset, ssa.TypeI64) - builder.InsertInstruction(loadTableBaseAddress) - tableBase := loadTableBaseAddress.Return() - - // Calculate the address of the target function. First we need to multiply targetOffsetInTable by 8 (pointer size). - multiplyBy8 := builder.AllocateInstruction() - three := builder.AllocateInstruction() - three.AsIconst64(3) - builder.InsertInstruction(three) - multiplyBy8.AsIshl(elementOffsetInTable, three.Return()) - builder.InsertInstruction(multiplyBy8) - targetOffsetInTableMultipliedBy8 := multiplyBy8.Return() - - // Then add the multiplied value to the base which results in the address of the target function (*wazevo.functionInstance) - calcElementAddressInTable := builder.AllocateInstruction() - calcElementAddressInTable.AsIadd(tableBase, targetOffsetInTableMultipliedBy8) - builder.InsertInstruction(calcElementAddressInTable) - return calcElementAddressInTable.Return() -} - -func (c *Compiler) lowerCallIndirect(typeIndex, tableIndex uint32) { - builder := c.ssaBuilder - state := c.state() - - elementOffsetInTable := state.pop() - functionInstancePtrAddress := c.lowerAccessTableWithBoundsCheck(tableIndex, elementOffsetInTable) - loadFunctionInstancePtr := builder.AllocateInstruction() - loadFunctionInstancePtr.AsLoad(functionInstancePtrAddress, 0, ssa.TypeI64) - builder.InsertInstruction(loadFunctionInstancePtr) - functionInstancePtr := loadFunctionInstancePtr.Return() - - // Check if it is not the null pointer. - zero := builder.AllocateInstruction() - zero.AsIconst64(0) - builder.InsertInstruction(zero) - checkNull := builder.AllocateInstruction() - checkNull.AsIcmp(functionInstancePtr, zero.Return(), ssa.IntegerCmpCondEqual) - builder.InsertInstruction(checkNull) - exitIfNull := builder.AllocateInstruction() - exitIfNull.AsExitIfTrueWithCode(c.execCtxPtrValue, checkNull.Return(), wazevoapi.ExitCodeIndirectCallNullPointer) - builder.InsertInstruction(exitIfNull) - - // We need to do the type check. First, load the target function instance's typeID. - loadTypeID := builder.AllocateInstruction() - loadTypeID.AsLoad(functionInstancePtr, wazevoapi.FunctionInstanceTypeIDOffset, ssa.TypeI32) - builder.InsertInstruction(loadTypeID) - actualTypeID := loadTypeID.Return() - - // Next, we load the expected TypeID: - loadTypeIDsBegin := builder.AllocateInstruction() - loadTypeIDsBegin.AsLoad(c.moduleCtxPtrValue, c.offset.TypeIDs1stElement.U32(), ssa.TypeI64) - builder.InsertInstruction(loadTypeIDsBegin) - typeIDsBegin := loadTypeIDsBegin.Return() - - loadExpectedTypeID := builder.AllocateInstruction() - loadExpectedTypeID.AsLoad(typeIDsBegin, uint32(typeIndex)*4 /* size of wasm.FunctionTypeID */, ssa.TypeI32) - builder.InsertInstruction(loadExpectedTypeID) - expectedTypeID := loadExpectedTypeID.Return() - - // Check if the type ID matches. - checkTypeID := builder.AllocateInstruction() - checkTypeID.AsIcmp(actualTypeID, expectedTypeID, ssa.IntegerCmpCondNotEqual) - builder.InsertInstruction(checkTypeID) - exitIfNotMatch := builder.AllocateInstruction() - exitIfNotMatch.AsExitIfTrueWithCode(c.execCtxPtrValue, checkTypeID.Return(), wazevoapi.ExitCodeIndirectCallTypeMismatch) - builder.InsertInstruction(exitIfNotMatch) - - // Now ready to call the function. Load the executable and moduleContextOpaquePtr from the function instance. - loadExecutablePtr := builder.AllocateInstruction() - loadExecutablePtr.AsLoad(functionInstancePtr, wazevoapi.FunctionInstanceExecutableOffset, ssa.TypeI64) - builder.InsertInstruction(loadExecutablePtr) - executablePtr := loadExecutablePtr.Return() - loadModuleContextOpaquePtr := builder.AllocateInstruction() - loadModuleContextOpaquePtr.AsLoad(functionInstancePtr, wazevoapi.FunctionInstanceModuleContextOpaquePtrOffset, ssa.TypeI64) - builder.InsertInstruction(loadModuleContextOpaquePtr) - moduleContextOpaquePtr := loadModuleContextOpaquePtr.Return() - - typ := &c.m.TypeSection[typeIndex] - tail := len(state.values) - len(typ.Params) - vs := state.values[tail:] - state.values = state.values[:tail] - args := c.allocateVarLengthValues(2+len(vs), c.execCtxPtrValue, moduleContextOpaquePtr) - args = args.Append(builder.VarLengthPool(), vs...) - - // Before transfer the control to the callee, we have to store the current module's moduleContextPtr - // into execContext.callerModuleContextPtr in case when the callee is a Go function. - c.storeCallerModuleContext() - - call := builder.AllocateInstruction() - call.AsCallIndirect(executablePtr, c.signatures[typ], args) - builder.InsertInstruction(call) - - first, rest := call.Returns() - if first.Valid() { - state.push(first) - } - for _, v := range rest { - state.push(v) - } - - c.reloadAfterCall() -} - -// memOpSetup inserts the bounds check and calculates the address of the memory operation (loads/stores). -func (c *Compiler) memOpSetup(baseAddr ssa.Value, constOffset, operationSizeInBytes uint64) (address ssa.Value) { - address = ssa.ValueInvalid - builder := c.ssaBuilder - - baseAddrID := baseAddr.ID() - ceil := constOffset + operationSizeInBytes - if known := c.getKnownSafeBound(baseAddrID); known.valid() { - // We reuse the calculated absolute address even if the bound is not known to be safe. - address = known.absoluteAddr - if ceil <= known.bound { - if !address.Valid() { - // This means that, the bound is known to be safe, but the memory base might have changed. - // So, we re-calculate the address. - memBase := c.getMemoryBaseValue(false) - extBaseAddr := builder.AllocateInstruction(). - AsUExtend(baseAddr, 32, 64). - Insert(builder). - Return() - address = builder.AllocateInstruction(). - AsIadd(memBase, extBaseAddr).Insert(builder).Return() - known.absoluteAddr = address // Update the absolute address for the subsequent memory access. - } - return - } - } - - ceilConst := builder.AllocateInstruction() - ceilConst.AsIconst64(ceil) - builder.InsertInstruction(ceilConst) - - // We calculate the offset in 64-bit space. - extBaseAddr := builder.AllocateInstruction(). - AsUExtend(baseAddr, 32, 64). - Insert(builder). - Return() - - // Note: memLen is already zero extended to 64-bit space at the load time. - memLen := c.getMemoryLenValue(false) - - // baseAddrPlusCeil = baseAddr + ceil - baseAddrPlusCeil := builder.AllocateInstruction() - baseAddrPlusCeil.AsIadd(extBaseAddr, ceilConst.Return()) - builder.InsertInstruction(baseAddrPlusCeil) - - // Check for out of bounds memory access: `memLen >= baseAddrPlusCeil`. - cmp := builder.AllocateInstruction() - cmp.AsIcmp(memLen, baseAddrPlusCeil.Return(), ssa.IntegerCmpCondUnsignedLessThan) - builder.InsertInstruction(cmp) - exitIfNZ := builder.AllocateInstruction() - exitIfNZ.AsExitIfTrueWithCode(c.execCtxPtrValue, cmp.Return(), wazevoapi.ExitCodeMemoryOutOfBounds) - builder.InsertInstruction(exitIfNZ) - - // Load the value from memBase + extBaseAddr. - if address == ssa.ValueInvalid { // Reuse the value if the memBase is already calculated at this point. - memBase := c.getMemoryBaseValue(false) - address = builder.AllocateInstruction(). - AsIadd(memBase, extBaseAddr).Insert(builder).Return() - } - - // Record the bound ceil for this baseAddr is known to be safe for the subsequent memory access in the same block. - c.recordKnownSafeBound(baseAddrID, ceil, address) - return -} - -// atomicMemOpSetup inserts the bounds check and calculates the address of the memory operation (loads/stores), including -// the constant offset and performs an alignment check on the final address. -func (c *Compiler) atomicMemOpSetup(baseAddr ssa.Value, constOffset, operationSizeInBytes uint64) (address ssa.Value) { - builder := c.ssaBuilder - - addrWithoutOffset := c.memOpSetup(baseAddr, constOffset, operationSizeInBytes) - var addr ssa.Value - if constOffset == 0 { - addr = addrWithoutOffset - } else { - offset := builder.AllocateInstruction().AsIconst64(constOffset).Insert(builder).Return() - addr = builder.AllocateInstruction().AsIadd(addrWithoutOffset, offset).Insert(builder).Return() - } - - c.memAlignmentCheck(addr, operationSizeInBytes) - - return addr -} - -func (c *Compiler) memAlignmentCheck(addr ssa.Value, operationSizeInBytes uint64) { - if operationSizeInBytes == 1 { - return // No alignment restrictions when accessing a byte - } - var checkBits uint64 - switch operationSizeInBytes { - case 2: - checkBits = 0b1 - case 4: - checkBits = 0b11 - case 8: - checkBits = 0b111 - } - - builder := c.ssaBuilder - - mask := builder.AllocateInstruction().AsIconst64(checkBits).Insert(builder).Return() - masked := builder.AllocateInstruction().AsBand(addr, mask).Insert(builder).Return() - zero := builder.AllocateInstruction().AsIconst64(0).Insert(builder).Return() - cmp := builder.AllocateInstruction().AsIcmp(masked, zero, ssa.IntegerCmpCondNotEqual).Insert(builder).Return() - builder.AllocateInstruction().AsExitIfTrueWithCode(c.execCtxPtrValue, cmp, wazevoapi.ExitCodeUnalignedAtomic).Insert(builder) -} - -func (c *Compiler) callMemmove(dst, src, size ssa.Value) { - args := c.allocateVarLengthValues(3, dst, src, size) - if size.Type() != ssa.TypeI64 { - panic("TODO: memmove size must be i64") - } - - builder := c.ssaBuilder - memmovePtr := builder.AllocateInstruction(). - AsLoad(c.execCtxPtrValue, - wazevoapi.ExecutionContextOffsetMemmoveAddress.U32(), - ssa.TypeI64, - ).Insert(builder).Return() - builder.AllocateInstruction().AsCallGoRuntimeMemmove(memmovePtr, &c.memmoveSig, args).Insert(builder) -} - -func (c *Compiler) reloadAfterCall() { - // Note that when these are not used in the following instructions, they will be optimized out. - // So in any ways, we define them! - - // After calling any function, memory buffer might have changed. So we need to re-define the variable. - // However, if the memory is shared, we don't need to reload the memory base and length as the base will never change. - if c.needMemory && !c.memoryShared { - c.reloadMemoryBaseLen() - } - - // Also, any mutable Global can change. - for _, index := range c.mutableGlobalVariablesIndexes { - _ = c.getWasmGlobalValue(index, true) - } -} - -func (c *Compiler) reloadMemoryBaseLen() { - _ = c.getMemoryBaseValue(true) - _ = c.getMemoryLenValue(true) - - // This function being called means that the memory base might have changed. - // Therefore, we need to clear the absolute addresses recorded in the known safe bounds - // because we cache the absolute address of the memory access per each base offset. - c.resetAbsoluteAddressInSafeBounds() -} - -func (c *Compiler) setWasmGlobalValue(index wasm.Index, v ssa.Value) { - variable := c.globalVariables[index] - opaqueOffset := c.offset.GlobalInstanceOffset(index) - - builder := c.ssaBuilder - if index < c.m.ImportGlobalCount { - loadGlobalInstPtr := builder.AllocateInstruction() - loadGlobalInstPtr.AsLoad(c.moduleCtxPtrValue, uint32(opaqueOffset), ssa.TypeI64) - builder.InsertInstruction(loadGlobalInstPtr) - - store := builder.AllocateInstruction() - store.AsStore(ssa.OpcodeStore, v, loadGlobalInstPtr.Return(), uint32(0)) - builder.InsertInstruction(store) - - } else { - store := builder.AllocateInstruction() - store.AsStore(ssa.OpcodeStore, v, c.moduleCtxPtrValue, uint32(opaqueOffset)) - builder.InsertInstruction(store) - } - - // The value has changed to `v`, so we record it. - builder.DefineVariableInCurrentBB(variable, v) -} - -func (c *Compiler) getWasmGlobalValue(index wasm.Index, forceLoad bool) ssa.Value { - variable := c.globalVariables[index] - typ := c.globalVariablesTypes[index] - opaqueOffset := c.offset.GlobalInstanceOffset(index) - - builder := c.ssaBuilder - if !forceLoad { - if v := builder.FindValueInLinearPath(variable); v.Valid() { - return v - } - } - - var load *ssa.Instruction - if index < c.m.ImportGlobalCount { - loadGlobalInstPtr := builder.AllocateInstruction() - loadGlobalInstPtr.AsLoad(c.moduleCtxPtrValue, uint32(opaqueOffset), ssa.TypeI64) - builder.InsertInstruction(loadGlobalInstPtr) - load = builder.AllocateInstruction(). - AsLoad(loadGlobalInstPtr.Return(), uint32(0), typ) - } else { - load = builder.AllocateInstruction(). - AsLoad(c.moduleCtxPtrValue, uint32(opaqueOffset), typ) - } - - v := load.Insert(builder).Return() - builder.DefineVariableInCurrentBB(variable, v) - return v -} - -const ( - memoryInstanceBufOffset = 0 - memoryInstanceBufSizeOffset = memoryInstanceBufOffset + 8 -) - -func (c *Compiler) getMemoryBaseValue(forceReload bool) ssa.Value { - builder := c.ssaBuilder - variable := c.memoryBaseVariable - if !forceReload { - if v := builder.FindValueInLinearPath(variable); v.Valid() { - return v - } - } - - var ret ssa.Value - if c.offset.LocalMemoryBegin < 0 { - loadMemInstPtr := builder.AllocateInstruction() - loadMemInstPtr.AsLoad(c.moduleCtxPtrValue, c.offset.ImportedMemoryBegin.U32(), ssa.TypeI64) - builder.InsertInstruction(loadMemInstPtr) - memInstPtr := loadMemInstPtr.Return() - - loadBufPtr := builder.AllocateInstruction() - loadBufPtr.AsLoad(memInstPtr, memoryInstanceBufOffset, ssa.TypeI64) - builder.InsertInstruction(loadBufPtr) - ret = loadBufPtr.Return() - } else { - load := builder.AllocateInstruction() - load.AsLoad(c.moduleCtxPtrValue, c.offset.LocalMemoryBase().U32(), ssa.TypeI64) - builder.InsertInstruction(load) - ret = load.Return() - } - - builder.DefineVariableInCurrentBB(variable, ret) - return ret -} - -func (c *Compiler) getMemoryLenValue(forceReload bool) ssa.Value { - variable := c.memoryLenVariable - builder := c.ssaBuilder - if !forceReload && !c.memoryShared { - if v := builder.FindValueInLinearPath(variable); v.Valid() { - return v - } - } - - var ret ssa.Value - if c.offset.LocalMemoryBegin < 0 { - loadMemInstPtr := builder.AllocateInstruction() - loadMemInstPtr.AsLoad(c.moduleCtxPtrValue, c.offset.ImportedMemoryBegin.U32(), ssa.TypeI64) - builder.InsertInstruction(loadMemInstPtr) - memInstPtr := loadMemInstPtr.Return() - - loadBufSizePtr := builder.AllocateInstruction() - if c.memoryShared { - sizeOffset := builder.AllocateInstruction().AsIconst64(memoryInstanceBufSizeOffset).Insert(builder).Return() - addr := builder.AllocateInstruction().AsIadd(memInstPtr, sizeOffset).Insert(builder).Return() - loadBufSizePtr.AsAtomicLoad(addr, 8, ssa.TypeI64) - } else { - loadBufSizePtr.AsLoad(memInstPtr, memoryInstanceBufSizeOffset, ssa.TypeI64) - } - builder.InsertInstruction(loadBufSizePtr) - - ret = loadBufSizePtr.Return() - } else { - load := builder.AllocateInstruction() - if c.memoryShared { - lenOffset := builder.AllocateInstruction().AsIconst64(c.offset.LocalMemoryLen().U64()).Insert(builder).Return() - addr := builder.AllocateInstruction().AsIadd(c.moduleCtxPtrValue, lenOffset).Insert(builder).Return() - load.AsAtomicLoad(addr, 8, ssa.TypeI64) - } else { - load.AsExtLoad(ssa.OpcodeUload32, c.moduleCtxPtrValue, c.offset.LocalMemoryLen().U32(), true) - } - builder.InsertInstruction(load) - ret = load.Return() - } - - builder.DefineVariableInCurrentBB(variable, ret) - return ret -} - -func (c *Compiler) insertIcmp(cond ssa.IntegerCmpCond) { - state, builder := c.state(), c.ssaBuilder - y, x := state.pop(), state.pop() - cmp := builder.AllocateInstruction() - cmp.AsIcmp(x, y, cond) - builder.InsertInstruction(cmp) - value := cmp.Return() - state.push(value) -} - -func (c *Compiler) insertFcmp(cond ssa.FloatCmpCond) { - state, builder := c.state(), c.ssaBuilder - y, x := state.pop(), state.pop() - cmp := builder.AllocateInstruction() - cmp.AsFcmp(x, y, cond) - builder.InsertInstruction(cmp) - value := cmp.Return() - state.push(value) -} - -// storeCallerModuleContext stores the current module's moduleContextPtr into execContext.callerModuleContextPtr. -func (c *Compiler) storeCallerModuleContext() { - builder := c.ssaBuilder - execCtx := c.execCtxPtrValue - store := builder.AllocateInstruction() - store.AsStore(ssa.OpcodeStore, - c.moduleCtxPtrValue, execCtx, wazevoapi.ExecutionContextOffsetCallerModuleContextPtr.U32()) - builder.InsertInstruction(store) -} - -func (c *Compiler) readByte() byte { - v := c.wasmFunctionBody[c.loweringState.pc+1] - c.loweringState.pc++ - return v -} - -func (c *Compiler) readI32u() uint32 { - v, n, err := leb128.LoadUint32(c.wasmFunctionBody[c.loweringState.pc+1:]) - if err != nil { - panic(err) // shouldn't be reached since compilation comes after validation. - } - c.loweringState.pc += int(n) - return v -} - -func (c *Compiler) readI32s() int32 { - v, n, err := leb128.LoadInt32(c.wasmFunctionBody[c.loweringState.pc+1:]) - if err != nil { - panic(err) // shouldn't be reached since compilation comes after validation. - } - c.loweringState.pc += int(n) - return v -} - -func (c *Compiler) readI64s() int64 { - v, n, err := leb128.LoadInt64(c.wasmFunctionBody[c.loweringState.pc+1:]) - if err != nil { - panic(err) // shouldn't be reached since compilation comes after validation. - } - c.loweringState.pc += int(n) - return v -} - -func (c *Compiler) readF32() float32 { - v := math.Float32frombits(binary.LittleEndian.Uint32(c.wasmFunctionBody[c.loweringState.pc+1:])) - c.loweringState.pc += 4 - return v -} - -func (c *Compiler) readF64() float64 { - v := math.Float64frombits(binary.LittleEndian.Uint64(c.wasmFunctionBody[c.loweringState.pc+1:])) - c.loweringState.pc += 8 - return v -} - -// readBlockType reads the block type from the current position of the bytecode reader. -func (c *Compiler) readBlockType() *wasm.FunctionType { - state := c.state() - - c.br.Reset(c.wasmFunctionBody[state.pc+1:]) - bt, num, err := wasm.DecodeBlockType(c.m.TypeSection, c.br, api.CoreFeaturesV2) - if err != nil { - panic(err) // shouldn't be reached since compilation comes after validation. - } - state.pc += int(num) - - return bt -} - -func (c *Compiler) readMemArg() (align, offset uint32) { - state := c.state() - - align, num, err := leb128.LoadUint32(c.wasmFunctionBody[state.pc+1:]) - if err != nil { - panic(fmt.Errorf("read memory align: %v", err)) - } - - state.pc += int(num) - offset, num, err = leb128.LoadUint32(c.wasmFunctionBody[state.pc+1:]) - if err != nil { - panic(fmt.Errorf("read memory offset: %v", err)) - } - - state.pc += int(num) - return align, offset -} - -// insertJumpToBlock inserts a jump instruction to the given block in the current block. -func (c *Compiler) insertJumpToBlock(args ssa.Values, targetBlk ssa.BasicBlock) { - if targetBlk.ReturnBlock() { - if c.needListener { - c.callListenerAfter() - } - } - - builder := c.ssaBuilder - jmp := builder.AllocateInstruction() - jmp.AsJump(args, targetBlk) - builder.InsertInstruction(jmp) -} - -func (c *Compiler) insertIntegerExtend(signed bool, from, to byte) { - state := c.state() - builder := c.ssaBuilder - v := state.pop() - extend := builder.AllocateInstruction() - if signed { - extend.AsSExtend(v, from, to) - } else { - extend.AsUExtend(v, from, to) - } - builder.InsertInstruction(extend) - value := extend.Return() - state.push(value) -} - -func (c *Compiler) switchTo(originalStackLen int, targetBlk ssa.BasicBlock) { - if targetBlk.Preds() == 0 { - c.loweringState.unreachable = true - } - - // Now we should adjust the stack and start translating the continuation block. - c.loweringState.values = c.loweringState.values[:originalStackLen] - - c.ssaBuilder.SetCurrentBlock(targetBlk) - - // At this point, blocks params consist only of the Wasm-level parameters, - // (since it's added only when we are trying to resolve variable *inside* this block). - for i := 0; i < targetBlk.Params(); i++ { - value := targetBlk.Param(i) - c.loweringState.push(value) - } -} - -// results returns the number of results of the current function. -func (c *Compiler) results() int { - return len(c.wasmFunctionTyp.Results) -} - -func (c *Compiler) lowerBrTable(labels []uint32, index ssa.Value) { - state := c.state() - builder := c.ssaBuilder - - f := state.ctrlPeekAt(int(labels[0])) - var numArgs int - if f.isLoop() { - numArgs = len(f.blockType.Params) - } else { - numArgs = len(f.blockType.Results) - } - - varPool := builder.VarLengthPool() - trampolineBlockIDs := varPool.Allocate(len(labels)) - - // We need trampoline blocks since depending on the target block structure, we might end up inserting moves before jumps, - // which cannot be done with br_table. Instead, we can do such per-block moves in the trampoline blocks. - // At the linking phase (very end of the backend), we can remove the unnecessary jumps, and therefore no runtime overhead. - currentBlk := builder.CurrentBlock() - for _, l := range labels { - // Args are always on the top of the stack. Note that we should not share the args slice - // among the jump instructions since the args are modified during passes (e.g. redundant phi elimination). - args := c.nPeekDup(numArgs) - targetBlk, _ := state.brTargetArgNumFor(l) - trampoline := builder.AllocateBasicBlock() - builder.SetCurrentBlock(trampoline) - c.insertJumpToBlock(args, targetBlk) - trampolineBlockIDs = trampolineBlockIDs.Append(builder.VarLengthPool(), ssa.Value(trampoline.ID())) - } - builder.SetCurrentBlock(currentBlk) - - // If the target block has no arguments, we can just jump to the target block. - brTable := builder.AllocateInstruction() - brTable.AsBrTable(index, trampolineBlockIDs) - builder.InsertInstruction(brTable) - - for _, trampolineID := range trampolineBlockIDs.View() { - builder.Seal(builder.BasicBlock(ssa.BasicBlockID(trampolineID))) - } -} - -func (l *loweringState) brTargetArgNumFor(labelIndex uint32) (targetBlk ssa.BasicBlock, argNum int) { - targetFrame := l.ctrlPeekAt(int(labelIndex)) - if targetFrame.isLoop() { - targetBlk, argNum = targetFrame.blk, len(targetFrame.blockType.Params) - } else { - targetBlk, argNum = targetFrame.followingBlock, len(targetFrame.blockType.Results) - } - return -} - -func (c *Compiler) callListenerBefore() { - c.storeCallerModuleContext() - - builder := c.ssaBuilder - beforeListeners1stElement := builder.AllocateInstruction(). - AsLoad(c.moduleCtxPtrValue, - c.offset.BeforeListenerTrampolines1stElement.U32(), - ssa.TypeI64, - ).Insert(builder).Return() - - beforeListenerPtr := builder.AllocateInstruction(). - AsLoad(beforeListeners1stElement, uint32(c.wasmFunctionTypeIndex)*8 /* 8 bytes per index */, ssa.TypeI64).Insert(builder).Return() - - entry := builder.EntryBlock() - ps := entry.Params() - - args := c.allocateVarLengthValues(ps, c.execCtxPtrValue, - builder.AllocateInstruction().AsIconst32(c.wasmLocalFunctionIndex).Insert(builder).Return()) - for i := 2; i < ps; i++ { - args = args.Append(builder.VarLengthPool(), entry.Param(i)) - } - - beforeSig := c.listenerSignatures[c.wasmFunctionTyp][0] - builder.AllocateInstruction(). - AsCallIndirect(beforeListenerPtr, beforeSig, args). - Insert(builder) -} - -func (c *Compiler) callListenerAfter() { - c.storeCallerModuleContext() - - builder := c.ssaBuilder - afterListeners1stElement := builder.AllocateInstruction(). - AsLoad(c.moduleCtxPtrValue, - c.offset.AfterListenerTrampolines1stElement.U32(), - ssa.TypeI64, - ).Insert(builder).Return() - - afterListenerPtr := builder.AllocateInstruction(). - AsLoad(afterListeners1stElement, - uint32(c.wasmFunctionTypeIndex)*8 /* 8 bytes per index */, ssa.TypeI64). - Insert(builder). - Return() - - afterSig := c.listenerSignatures[c.wasmFunctionTyp][1] - args := c.allocateVarLengthValues( - c.results()+2, - c.execCtxPtrValue, - builder.AllocateInstruction().AsIconst32(c.wasmLocalFunctionIndex).Insert(builder).Return(), - ) - - l := c.state() - tail := len(l.values) - args = args.Append(c.ssaBuilder.VarLengthPool(), l.values[tail-c.results():tail]...) - builder.AllocateInstruction(). - AsCallIndirect(afterListenerPtr, afterSig, args). - Insert(builder) -} - -const ( - elementOrDataInstanceLenOffset = 8 - elementOrDataInstanceSize = 24 -) - -// dropInstance inserts instructions to drop the element/data instance specified by the given index. -func (c *Compiler) dropDataOrElementInstance(index uint32, firstItemOffset wazevoapi.Offset) { - builder := c.ssaBuilder - instPtr := c.dataOrElementInstanceAddr(index, firstItemOffset) - - zero := builder.AllocateInstruction().AsIconst64(0).Insert(builder).Return() - - // Clear the instance. - builder.AllocateInstruction().AsStore(ssa.OpcodeStore, zero, instPtr, 0).Insert(builder) - builder.AllocateInstruction().AsStore(ssa.OpcodeStore, zero, instPtr, elementOrDataInstanceLenOffset).Insert(builder) - builder.AllocateInstruction().AsStore(ssa.OpcodeStore, zero, instPtr, elementOrDataInstanceLenOffset+8).Insert(builder) -} - -func (c *Compiler) dataOrElementInstanceAddr(index uint32, firstItemOffset wazevoapi.Offset) ssa.Value { - builder := c.ssaBuilder - - _1stItemPtr := builder. - AllocateInstruction(). - AsLoad(c.moduleCtxPtrValue, firstItemOffset.U32(), ssa.TypeI64). - Insert(builder).Return() - - // Each data/element instance is a slice, so we need to multiply index by 16 to get the offset of the target instance. - index = index * elementOrDataInstanceSize - indexExt := builder.AllocateInstruction().AsIconst64(uint64(index)).Insert(builder).Return() - // Then, add the offset to the address of the instance. - instPtr := builder.AllocateInstruction().AsIadd(_1stItemPtr, indexExt).Insert(builder).Return() - return instPtr -} - -func (c *Compiler) boundsCheckInDataOrElementInstance(instPtr, offsetInInstance, copySize ssa.Value, exitCode wazevoapi.ExitCode) { - builder := c.ssaBuilder - dataInstLen := builder.AllocateInstruction(). - AsLoad(instPtr, elementOrDataInstanceLenOffset, ssa.TypeI64). - Insert(builder).Return() - ceil := builder.AllocateInstruction().AsIadd(offsetInInstance, copySize).Insert(builder).Return() - cmp := builder.AllocateInstruction(). - AsIcmp(dataInstLen, ceil, ssa.IntegerCmpCondUnsignedLessThan). - Insert(builder). - Return() - builder.AllocateInstruction(). - AsExitIfTrueWithCode(c.execCtxPtrValue, cmp, exitCode). - Insert(builder) -} - -func (c *Compiler) boundsCheckInTable(tableIndex uint32, offset, size ssa.Value) (tableInstancePtr ssa.Value) { - builder := c.ssaBuilder - dstCeil := builder.AllocateInstruction().AsIadd(offset, size).Insert(builder).Return() - - // Load the table. - tableInstancePtr = builder.AllocateInstruction(). - AsLoad(c.moduleCtxPtrValue, c.offset.TableOffset(int(tableIndex)).U32(), ssa.TypeI64). - Insert(builder).Return() - - // Load the table's length. - tableLen := builder.AllocateInstruction(). - AsLoad(tableInstancePtr, tableInstanceLenOffset, ssa.TypeI32).Insert(builder).Return() - tableLenExt := builder.AllocateInstruction().AsUExtend(tableLen, 32, 64).Insert(builder).Return() - - // Compare the length and the target, and trap if out of bounds. - checkOOB := builder.AllocateInstruction() - checkOOB.AsIcmp(tableLenExt, dstCeil, ssa.IntegerCmpCondUnsignedLessThan) - builder.InsertInstruction(checkOOB) - exitIfOOB := builder.AllocateInstruction() - exitIfOOB.AsExitIfTrueWithCode(c.execCtxPtrValue, checkOOB.Return(), wazevoapi.ExitCodeTableOutOfBounds) - builder.InsertInstruction(exitIfOOB) - return -} - -func (c *Compiler) loadTableBaseAddr(tableInstancePtr ssa.Value) ssa.Value { - builder := c.ssaBuilder - loadTableBaseAddress := builder. - AllocateInstruction(). - AsLoad(tableInstancePtr, tableInstanceBaseAddressOffset, ssa.TypeI64). - Insert(builder) - return loadTableBaseAddress.Return() -} - -func (c *Compiler) boundsCheckInMemory(memLen, offset, size ssa.Value) { - builder := c.ssaBuilder - ceil := builder.AllocateInstruction().AsIadd(offset, size).Insert(builder).Return() - cmp := builder.AllocateInstruction(). - AsIcmp(memLen, ceil, ssa.IntegerCmpCondUnsignedLessThan). - Insert(builder). - Return() - builder.AllocateInstruction(). - AsExitIfTrueWithCode(c.execCtxPtrValue, cmp, wazevoapi.ExitCodeMemoryOutOfBounds). - Insert(builder) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/misc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/misc.go deleted file mode 100644 index 2db2b892c..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/misc.go +++ /dev/null @@ -1,10 +0,0 @@ -package frontend - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" - "github.com/tetratelabs/wazero/internal/wasm" -) - -func FunctionIndexToFuncRef(idx wasm.Index) ssa.FuncRef { - return ssa.FuncRef(idx) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id.go deleted file mode 100644 index 5b055d127..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id.go +++ /dev/null @@ -1,13 +0,0 @@ -package frontend - -import ( - "slices" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -func sortSSAValueIDs(IDs []ssa.ValueID) { - slices.SortFunc(IDs, func(i, j ssa.ValueID) int { - return int(i) - int(j) - }) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/hostmodule.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/hostmodule.go deleted file mode 100644 index 800a5d2a8..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/hostmodule.go +++ /dev/null @@ -1,88 +0,0 @@ -package wazevo - -import ( - "encoding/binary" - "reflect" - "unsafe" - - "github.com/tetratelabs/wazero/experimental" - "github.com/tetratelabs/wazero/internal/wasm" -) - -func buildHostModuleOpaque(m *wasm.Module, listeners []experimental.FunctionListener) moduleContextOpaque { - size := len(m.CodeSection)*16 + 32 - ret := newAlignedOpaque(size) - - binary.LittleEndian.PutUint64(ret[0:], uint64(uintptr(unsafe.Pointer(m)))) - - if len(listeners) > 0 { - //nolint:staticcheck - sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&listeners)) - binary.LittleEndian.PutUint64(ret[8:], uint64(sliceHeader.Data)) - binary.LittleEndian.PutUint64(ret[16:], uint64(sliceHeader.Len)) - binary.LittleEndian.PutUint64(ret[24:], uint64(sliceHeader.Cap)) - } - - offset := 32 - for i := range m.CodeSection { - goFn := m.CodeSection[i].GoFunc - writeIface(goFn, ret[offset:]) - offset += 16 - } - return ret -} - -func hostModuleFromOpaque(opaqueBegin uintptr) *wasm.Module { - var opaqueViewOverSlice []byte - //nolint:staticcheck - sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverSlice)) - sh.Data = opaqueBegin - sh.Len = 32 - sh.Cap = 32 - return *(**wasm.Module)(unsafe.Pointer(&opaqueViewOverSlice[0])) -} - -func hostModuleListenersSliceFromOpaque(opaqueBegin uintptr) []experimental.FunctionListener { - var opaqueViewOverSlice []byte - //nolint:staticcheck - sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverSlice)) - sh.Data = opaqueBegin - sh.Len = 32 - sh.Cap = 32 - - b := binary.LittleEndian.Uint64(opaqueViewOverSlice[8:]) - l := binary.LittleEndian.Uint64(opaqueViewOverSlice[16:]) - c := binary.LittleEndian.Uint64(opaqueViewOverSlice[24:]) - var ret []experimental.FunctionListener - //nolint:staticcheck - sh = (*reflect.SliceHeader)(unsafe.Pointer(&ret)) - sh.Data = uintptr(b) - sh.Len = int(l) - sh.Cap = int(c) - return ret -} - -func hostModuleGoFuncFromOpaque[T any](index int, opaqueBegin uintptr) T { - offset := uintptr(index*16) + 32 - ptr := opaqueBegin + offset - - var opaqueViewOverFunction []byte - //nolint:staticcheck - sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverFunction)) - sh.Data = ptr - sh.Len = 16 - sh.Cap = 16 - return readIface(opaqueViewOverFunction).(T) -} - -func writeIface(goFn interface{}, buf []byte) { - goFnIface := *(*[2]uint64)(unsafe.Pointer(&goFn)) - binary.LittleEndian.PutUint64(buf, goFnIface[0]) - binary.LittleEndian.PutUint64(buf[8:], goFnIface[1]) -} - -func readIface(buf []byte) interface{} { - b := binary.LittleEndian.Uint64(buf) - s := binary.LittleEndian.Uint64(buf[8:]) - return *(*interface{})(unsafe.Pointer(&[2]uint64{b, s})) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_amd64.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_amd64.go deleted file mode 100644 index da27cc108..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_amd64.go +++ /dev/null @@ -1,30 +0,0 @@ -//go:build amd64 - -package wazevo - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64" -) - -func newMachine() backend.Machine { - return amd64.NewBackend() -} - -// unwindStack is a function to unwind the stack, and appends return addresses to `returnAddresses` slice. -// The implementation must be aligned with the ABI/Calling convention. -func unwindStack(sp, fp, top uintptr, returnAddresses []uintptr) []uintptr { - return amd64.UnwindStack(sp, fp, top, returnAddresses) -} - -// goCallStackView is a function to get a view of the stack before a Go call, which -// is the view of the stack allocated in CompileGoFunctionTrampoline. -func goCallStackView(stackPointerBeforeGoCall *uint64) []uint64 { - return amd64.GoCallStackView(stackPointerBeforeGoCall) -} - -// adjustClonedStack is a function to adjust the stack after it is grown. -// More precisely, absolute addresses (frame pointers) in the stack must be adjusted. -func adjustClonedStack(oldsp, oldTop, sp, fp, top uintptr) { - amd64.AdjustClonedStack(oldsp, oldTop, sp, fp, top) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_arm64.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_arm64.go deleted file mode 100644 index e7a846548..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_arm64.go +++ /dev/null @@ -1,32 +0,0 @@ -//go:build arm64 - -package wazevo - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64" -) - -func newMachine() backend.Machine { - return arm64.NewBackend() -} - -// unwindStack is a function to unwind the stack, and appends return addresses to `returnAddresses` slice. -// The implementation must be aligned with the ABI/Calling convention. -func unwindStack(sp, fp, top uintptr, returnAddresses []uintptr) []uintptr { - return arm64.UnwindStack(sp, fp, top, returnAddresses) -} - -// goCallStackView is a function to get a view of the stack before a Go call, which -// is the view of the stack allocated in CompileGoFunctionTrampoline. -func goCallStackView(stackPointerBeforeGoCall *uint64) []uint64 { - return arm64.GoCallStackView(stackPointerBeforeGoCall) -} - -// adjustClonedStack is a function to adjust the stack after it is grown. -// More precisely, absolute addresses (frame pointers) in the stack must be adjusted. -func adjustClonedStack(oldsp, oldTop, sp, fp, top uintptr) { - // TODO: currently, the frame pointers are not used, and saved old sps are relative to the current stack pointer, - // so no need to adjustment on arm64. However, when we make it absolute, which in my opinion is better perf-wise - // at the expense of slightly costly stack growth, we need to adjust the pushed frame pointers. -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_other.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_other.go deleted file mode 100644 index c5afc6314..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_other.go +++ /dev/null @@ -1,29 +0,0 @@ -//go:build !(amd64 || arm64) - -package wazevo - -import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" -) - -func newMachine() backend.Machine { - panic("unsupported architecture") -} - -// unwindStack is a function to unwind the stack, and appends return addresses to `returnAddresses` slice. -// The implementation must be aligned with the ABI/Calling convention. -func unwindStack(sp, fp, top uintptr, returnAddresses []uintptr) []uintptr { - panic("unsupported architecture") -} - -// goCallStackView is a function to get a view of the stack before a Go call, which -// is the view of the stack allocated in CompileGoFunctionTrampoline. -func goCallStackView(stackPointerBeforeGoCall *uint64) []uint64 { - panic("unsupported architecture") -} - -// adjustClonedStack is a function to adjust the stack after it is grown. -// More precisely, absolute addresses (frame pointers) in the stack must be adjusted. -func adjustClonedStack(oldsp, oldTop, sp, fp, top uintptr) { - panic("unsupported architecture") -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/memmove.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/memmove.go deleted file mode 100644 index 889922107..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/memmove.go +++ /dev/null @@ -1,11 +0,0 @@ -package wazevo - -import ( - "reflect" - "unsafe" -) - -//go:linkname memmove runtime.memmove -func memmove(_, _ unsafe.Pointer, _ uintptr) - -var memmovPtr = reflect.ValueOf(memmove).Pointer() diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go deleted file mode 100644 index 8811feed7..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go +++ /dev/null @@ -1,331 +0,0 @@ -package wazevo - -import ( - "encoding/binary" - "unsafe" - - "github.com/tetratelabs/wazero/api" - "github.com/tetratelabs/wazero/experimental" - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" - "github.com/tetratelabs/wazero/internal/wasm" - "github.com/tetratelabs/wazero/internal/wasmruntime" -) - -type ( - // moduleEngine implements wasm.ModuleEngine. - moduleEngine struct { - // opaquePtr equals &opaque[0]. - opaquePtr *byte - parent *compiledModule - module *wasm.ModuleInstance - opaque moduleContextOpaque - localFunctionInstances []*functionInstance - importedFunctions []importedFunction - listeners []experimental.FunctionListener - } - - functionInstance struct { - executable *byte - moduleContextOpaquePtr *byte - typeID wasm.FunctionTypeID - indexInModule wasm.Index - } - - importedFunction struct { - me *moduleEngine - indexInModule wasm.Index - } - - // moduleContextOpaque is the opaque byte slice of Module instance specific contents whose size - // is only Wasm-compile-time known, hence dynamic. Its contents are basically the pointers to the module instance, - // specific objects as well as functions. This is sometimes called "VMContext" in other Wasm runtimes. - // - // Internally, the buffer is structured as follows: - // - // type moduleContextOpaque struct { - // moduleInstance *wasm.ModuleInstance - // localMemoryBufferPtr *byte (optional) - // localMemoryLength uint64 (optional) - // importedMemoryInstance *wasm.MemoryInstance (optional) - // importedMemoryOwnerOpaqueCtx *byte (optional) - // importedFunctions [# of importedFunctions]functionInstance - // importedGlobals []ImportedGlobal (optional) - // localGlobals []Global (optional) - // typeIDsBegin &wasm.ModuleInstance.TypeIDs[0] (optional) - // tables []*wasm.TableInstance (optional) - // beforeListenerTrampolines1stElement **byte (optional) - // afterListenerTrampolines1stElement **byte (optional) - // dataInstances1stElement []wasm.DataInstance (optional) - // elementInstances1stElement []wasm.ElementInstance (optional) - // } - // - // type ImportedGlobal struct { - // *Global - // _ uint64 // padding - // } - // - // type Global struct { - // Val, ValHi uint64 - // } - // - // See wazevoapi.NewModuleContextOffsetData for the details of the offsets. - // - // Note that for host modules, the structure is entirely different. See buildHostModuleOpaque. - moduleContextOpaque []byte -) - -func newAlignedOpaque(size int) moduleContextOpaque { - // Check if the size is a multiple of 16. - if size%16 != 0 { - panic("size must be a multiple of 16") - } - buf := make([]byte, size+16) - // Align the buffer to 16 bytes. - rem := uintptr(unsafe.Pointer(&buf[0])) % 16 - buf = buf[16-rem:] - return buf -} - -func (m *moduleEngine) setupOpaque() { - inst := m.module - offsets := &m.parent.offsets - opaque := m.opaque - - binary.LittleEndian.PutUint64(opaque[offsets.ModuleInstanceOffset:], - uint64(uintptr(unsafe.Pointer(m.module))), - ) - - if lm := offsets.LocalMemoryBegin; lm >= 0 { - m.putLocalMemory() - } - - // Note: imported memory is resolved in ResolveImportedFunction. - - // Note: imported functions are resolved in ResolveImportedFunction. - - if globalOffset := offsets.GlobalsBegin; globalOffset >= 0 { - for i, g := range inst.Globals { - if i < int(inst.Source.ImportGlobalCount) { - importedME := g.Me.(*moduleEngine) - offset := importedME.parent.offsets.GlobalInstanceOffset(g.Index) - importedMEOpaque := importedME.opaque - binary.LittleEndian.PutUint64(opaque[globalOffset:], - uint64(uintptr(unsafe.Pointer(&importedMEOpaque[offset])))) - } else { - binary.LittleEndian.PutUint64(opaque[globalOffset:], g.Val) - binary.LittleEndian.PutUint64(opaque[globalOffset+8:], g.ValHi) - } - globalOffset += 16 - } - } - - if tableOffset := offsets.TablesBegin; tableOffset >= 0 { - // First we write the first element's address of typeIDs. - if len(inst.TypeIDs) > 0 { - binary.LittleEndian.PutUint64(opaque[offsets.TypeIDs1stElement:], uint64(uintptr(unsafe.Pointer(&inst.TypeIDs[0])))) - } - - // Then we write the table addresses. - for _, table := range inst.Tables { - binary.LittleEndian.PutUint64(opaque[tableOffset:], uint64(uintptr(unsafe.Pointer(table)))) - tableOffset += 8 - } - } - - if beforeListenerOffset := offsets.BeforeListenerTrampolines1stElement; beforeListenerOffset >= 0 { - binary.LittleEndian.PutUint64(opaque[beforeListenerOffset:], uint64(uintptr(unsafe.Pointer(&m.parent.listenerBeforeTrampolines[0])))) - } - if afterListenerOffset := offsets.AfterListenerTrampolines1stElement; afterListenerOffset >= 0 { - binary.LittleEndian.PutUint64(opaque[afterListenerOffset:], uint64(uintptr(unsafe.Pointer(&m.parent.listenerAfterTrampolines[0])))) - } - if len(inst.DataInstances) > 0 { - binary.LittleEndian.PutUint64(opaque[offsets.DataInstances1stElement:], uint64(uintptr(unsafe.Pointer(&inst.DataInstances[0])))) - } - if len(inst.ElementInstances) > 0 { - binary.LittleEndian.PutUint64(opaque[offsets.ElementInstances1stElement:], uint64(uintptr(unsafe.Pointer(&inst.ElementInstances[0])))) - } -} - -// NewFunction implements wasm.ModuleEngine. -func (m *moduleEngine) NewFunction(index wasm.Index) api.Function { - if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { - panic("When PrintMachineCodeHexPerFunctionDisassemblable enabled, functions must not be called") - } - - localIndex := index - if importedFnCount := m.module.Source.ImportFunctionCount; index < importedFnCount { - imported := &m.importedFunctions[index] - return imported.me.NewFunction(imported.indexInModule) - } else { - localIndex -= importedFnCount - } - - src := m.module.Source - typIndex := src.FunctionSection[localIndex] - typ := src.TypeSection[typIndex] - sizeOfParamResultSlice := typ.ResultNumInUint64 - if ps := typ.ParamNumInUint64; ps > sizeOfParamResultSlice { - sizeOfParamResultSlice = ps - } - p := m.parent - offset := p.functionOffsets[localIndex] - - ce := &callEngine{ - indexInModule: index, - executable: &p.executable[offset], - parent: m, - preambleExecutable: &m.parent.entryPreambles[typIndex][0], - sizeOfParamResultSlice: sizeOfParamResultSlice, - requiredParams: typ.ParamNumInUint64, - numberOfResults: typ.ResultNumInUint64, - } - - ce.execCtx.memoryGrowTrampolineAddress = &m.parent.sharedFunctions.memoryGrowExecutable[0] - ce.execCtx.stackGrowCallTrampolineAddress = &m.parent.sharedFunctions.stackGrowExecutable[0] - ce.execCtx.checkModuleExitCodeTrampolineAddress = &m.parent.sharedFunctions.checkModuleExitCode[0] - ce.execCtx.tableGrowTrampolineAddress = &m.parent.sharedFunctions.tableGrowExecutable[0] - ce.execCtx.refFuncTrampolineAddress = &m.parent.sharedFunctions.refFuncExecutable[0] - ce.execCtx.memoryWait32TrampolineAddress = &m.parent.sharedFunctions.memoryWait32Executable[0] - ce.execCtx.memoryWait64TrampolineAddress = &m.parent.sharedFunctions.memoryWait64Executable[0] - ce.execCtx.memoryNotifyTrampolineAddress = &m.parent.sharedFunctions.memoryNotifyExecutable[0] - ce.execCtx.memmoveAddress = memmovPtr - ce.init() - return ce -} - -// GetGlobalValue implements the same method as documented on wasm.ModuleEngine. -func (m *moduleEngine) GetGlobalValue(i wasm.Index) (lo, hi uint64) { - offset := m.parent.offsets.GlobalInstanceOffset(i) - buf := m.opaque[offset:] - if i < m.module.Source.ImportGlobalCount { - panic("GetGlobalValue should not be called for imported globals") - } - return binary.LittleEndian.Uint64(buf), binary.LittleEndian.Uint64(buf[8:]) -} - -// SetGlobalValue implements the same method as documented on wasm.ModuleEngine. -func (m *moduleEngine) SetGlobalValue(i wasm.Index, lo, hi uint64) { - offset := m.parent.offsets.GlobalInstanceOffset(i) - buf := m.opaque[offset:] - if i < m.module.Source.ImportGlobalCount { - panic("GetGlobalValue should not be called for imported globals") - } - binary.LittleEndian.PutUint64(buf, lo) - binary.LittleEndian.PutUint64(buf[8:], hi) -} - -// OwnsGlobals implements the same method as documented on wasm.ModuleEngine. -func (m *moduleEngine) OwnsGlobals() bool { return true } - -// MemoryGrown implements wasm.ModuleEngine. -func (m *moduleEngine) MemoryGrown() { - m.putLocalMemory() -} - -// putLocalMemory writes the local memory buffer pointer and length to the opaque buffer. -func (m *moduleEngine) putLocalMemory() { - mem := m.module.MemoryInstance - offset := m.parent.offsets.LocalMemoryBegin - - s := uint64(len(mem.Buffer)) - var b uint64 - if len(mem.Buffer) > 0 { - b = uint64(uintptr(unsafe.Pointer(&mem.Buffer[0]))) - } - binary.LittleEndian.PutUint64(m.opaque[offset:], b) - binary.LittleEndian.PutUint64(m.opaque[offset+8:], s) -} - -// ResolveImportedFunction implements wasm.ModuleEngine. -func (m *moduleEngine) ResolveImportedFunction(index, descFunc, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) { - executableOffset, moduleCtxOffset, typeIDOffset := m.parent.offsets.ImportedFunctionOffset(index) - importedME := importedModuleEngine.(*moduleEngine) - - if int(indexInImportedModule) >= len(importedME.importedFunctions) { - indexInImportedModule -= wasm.Index(len(importedME.importedFunctions)) - } else { - imported := &importedME.importedFunctions[indexInImportedModule] - m.ResolveImportedFunction(index, descFunc, imported.indexInModule, imported.me) - return // Recursively resolve the imported function. - } - - offset := importedME.parent.functionOffsets[indexInImportedModule] - typeID := m.module.TypeIDs[descFunc] - executable := &importedME.parent.executable[offset] - // Write functionInstance. - binary.LittleEndian.PutUint64(m.opaque[executableOffset:], uint64(uintptr(unsafe.Pointer(executable)))) - binary.LittleEndian.PutUint64(m.opaque[moduleCtxOffset:], uint64(uintptr(unsafe.Pointer(importedME.opaquePtr)))) - binary.LittleEndian.PutUint64(m.opaque[typeIDOffset:], uint64(typeID)) - - // Write importedFunction so that it can be used by NewFunction. - m.importedFunctions[index] = importedFunction{me: importedME, indexInModule: indexInImportedModule} -} - -// ResolveImportedMemory implements wasm.ModuleEngine. -func (m *moduleEngine) ResolveImportedMemory(importedModuleEngine wasm.ModuleEngine) { - importedME := importedModuleEngine.(*moduleEngine) - inst := importedME.module - - var memInstPtr uint64 - var memOwnerOpaquePtr uint64 - if offs := importedME.parent.offsets; offs.ImportedMemoryBegin >= 0 { - offset := offs.ImportedMemoryBegin - memInstPtr = binary.LittleEndian.Uint64(importedME.opaque[offset:]) - memOwnerOpaquePtr = binary.LittleEndian.Uint64(importedME.opaque[offset+8:]) - } else { - memInstPtr = uint64(uintptr(unsafe.Pointer(inst.MemoryInstance))) - memOwnerOpaquePtr = uint64(uintptr(unsafe.Pointer(importedME.opaquePtr))) - } - offset := m.parent.offsets.ImportedMemoryBegin - binary.LittleEndian.PutUint64(m.opaque[offset:], memInstPtr) - binary.LittleEndian.PutUint64(m.opaque[offset+8:], memOwnerOpaquePtr) -} - -// DoneInstantiation implements wasm.ModuleEngine. -func (m *moduleEngine) DoneInstantiation() { - if !m.module.Source.IsHostModule { - m.setupOpaque() - } -} - -// FunctionInstanceReference implements wasm.ModuleEngine. -func (m *moduleEngine) FunctionInstanceReference(funcIndex wasm.Index) wasm.Reference { - if funcIndex < m.module.Source.ImportFunctionCount { - begin, _, _ := m.parent.offsets.ImportedFunctionOffset(funcIndex) - return uintptr(unsafe.Pointer(&m.opaque[begin])) - } - localIndex := funcIndex - m.module.Source.ImportFunctionCount - p := m.parent - executable := &p.executable[p.functionOffsets[localIndex]] - typeID := m.module.TypeIDs[m.module.Source.FunctionSection[localIndex]] - - lf := &functionInstance{ - executable: executable, - moduleContextOpaquePtr: m.opaquePtr, - typeID: typeID, - indexInModule: funcIndex, - } - m.localFunctionInstances = append(m.localFunctionInstances, lf) - return uintptr(unsafe.Pointer(lf)) -} - -// LookupFunction implements wasm.ModuleEngine. -func (m *moduleEngine) LookupFunction(t *wasm.TableInstance, typeId wasm.FunctionTypeID, tableOffset wasm.Index) (*wasm.ModuleInstance, wasm.Index) { - if tableOffset >= uint32(len(t.References)) || t.Type != wasm.RefTypeFuncref { - panic(wasmruntime.ErrRuntimeInvalidTableAccess) - } - rawPtr := t.References[tableOffset] - if rawPtr == 0 { - panic(wasmruntime.ErrRuntimeInvalidTableAccess) - } - - tf := wazevoapi.PtrFromUintptr[functionInstance](rawPtr) - if tf.typeID != typeId { - panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch) - } - return moduleInstanceFromOpaquePtr(tf.moduleContextOpaquePtr), tf.indexInModule -} - -func moduleInstanceFromOpaquePtr(ptr *byte) *wasm.ModuleInstance { - return *(**wasm.ModuleInstance)(unsafe.Pointer(ptr)) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go deleted file mode 100644 index cf7f14d3b..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go +++ /dev/null @@ -1,379 +0,0 @@ -package ssa - -import ( - "fmt" - "strconv" - "strings" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -// BasicBlock represents the Basic Block of an SSA function. -// Each BasicBlock always ends with branching instructions (e.g. Branch, Return, etc.), -// and at most two branches are allowed. If there's two branches, these two are placed together at the end of the block. -// In other words, there's no branching instruction in the middle of the block. -// -// Note: we use the "block argument" variant of SSA, instead of PHI functions. See the package level doc comments. -// -// Note: we use "parameter/param" as a placeholder which represents a variant of PHI, and "argument/arg" as an actual -// Value passed to that "parameter/param". -type BasicBlock interface { - // ID returns the unique ID of this block. - ID() BasicBlockID - - // Name returns the unique string ID of this block. e.g. blk0, blk1, ... - Name() string - - // AddParam adds the parameter to the block whose type specified by `t`. - AddParam(b Builder, t Type) Value - - // Params returns the number of parameters to this block. - Params() int - - // Param returns (Variable, Value) which corresponds to the i-th parameter of this block. - // The returned Value is the definition of the param in this block. - Param(i int) Value - - // Root returns the root instruction of this block. - Root() *Instruction - - // Tail returns the tail instruction of this block. - Tail() *Instruction - - // EntryBlock returns true if this block represents the function entry. - EntryBlock() bool - - // ReturnBlock returns ture if this block represents the function return. - ReturnBlock() bool - - // Valid is true if this block is still valid even after optimizations. - Valid() bool - - // Sealed is true if this block has been sealed. - Sealed() bool - - // Preds returns the number of predecessors of this block. - Preds() int - - // Pred returns the i-th predecessor of this block. - Pred(i int) BasicBlock - - // Succs returns the number of successors of this block. - Succs() int - - // Succ returns the i-th successor of this block. - Succ(i int) BasicBlock - - // LoopHeader returns true if this block is a loop header. - LoopHeader() bool - - // LoopNestingForestChildren returns the children of this block in the loop nesting forest. - LoopNestingForestChildren() []BasicBlock -} - -type ( - // basicBlock is a basic block in a SSA-transformed function. - basicBlock struct { - id BasicBlockID - rootInstr, currentInstr *Instruction - // params are Values that represent parameters to a basicBlock. - // Each parameter can be considered as an output of PHI instruction in traditional SSA. - params Values - preds []basicBlockPredecessorInfo - success []*basicBlock - // singlePred is the alias to preds[0] for fast lookup, and only set after Seal is called. - singlePred *basicBlock - // lastDefinitions maps Variable to its last definition in this block. - lastDefinitions map[Variable]Value - // unknownsValues are used in builder.findValue. The usage is well-described in the paper. - unknownValues []unknownValue - // invalid is true if this block is made invalid during optimizations. - invalid bool - // sealed is true if this is sealed (all the predecessors are known). - sealed bool - // loopHeader is true if this block is a loop header: - // - // > A loop header (sometimes called the entry point of the loop) is a dominator that is the target - // > of a loop-forming back edge. The loop header dominates all blocks in the loop body. - // > A block may be a loop header for more than one loop. A loop may have multiple entry points, - // > in which case it has no "loop header". - // - // See https://en.wikipedia.org/wiki/Control-flow_graph for more details. - // - // This is modified during the subPassLoopDetection pass. - loopHeader bool - - // loopNestingForestChildren holds the children of this block in the loop nesting forest. - // Non-empty if and only if this block is a loop header (i.e. loopHeader=true) - loopNestingForestChildren wazevoapi.VarLength[BasicBlock] - - // reversePostOrder is used to sort all the blocks in the function in reverse post order. - // This is used in builder.LayoutBlocks. - reversePostOrder int32 - - // visited is used during various traversals. - visited int32 - - // child and sibling are the ones in the dominator tree. - child, sibling *basicBlock - } - // BasicBlockID is the unique ID of a basicBlock. - BasicBlockID uint32 - - unknownValue struct { - // variable is the variable that this unknownValue represents. - variable Variable - // value is the value that this unknownValue represents. - value Value - } -) - -// basicBlockVarLengthNil is the default nil value for basicBlock.loopNestingForestChildren. -var basicBlockVarLengthNil = wazevoapi.NewNilVarLength[BasicBlock]() - -const basicBlockIDReturnBlock = 0xffffffff - -// Name implements BasicBlock.Name. -func (bb *basicBlock) Name() string { - if bb.id == basicBlockIDReturnBlock { - return "blk_ret" - } else { - return fmt.Sprintf("blk%d", bb.id) - } -} - -// String implements fmt.Stringer for debugging. -func (bid BasicBlockID) String() string { - if bid == basicBlockIDReturnBlock { - return "blk_ret" - } else { - return fmt.Sprintf("blk%d", bid) - } -} - -// ID implements BasicBlock.ID. -func (bb *basicBlock) ID() BasicBlockID { - return bb.id -} - -// basicBlockPredecessorInfo is the information of a predecessor of a basicBlock. -// predecessor is determined by a pair of block and the branch instruction used to jump to the successor. -type basicBlockPredecessorInfo struct { - blk *basicBlock - branch *Instruction -} - -// EntryBlock implements BasicBlock.EntryBlock. -func (bb *basicBlock) EntryBlock() bool { - return bb.id == 0 -} - -// ReturnBlock implements BasicBlock.ReturnBlock. -func (bb *basicBlock) ReturnBlock() bool { - return bb.id == basicBlockIDReturnBlock -} - -// AddParam implements BasicBlock.AddParam. -func (bb *basicBlock) AddParam(b Builder, typ Type) Value { - paramValue := b.allocateValue(typ) - bb.params = bb.params.Append(&b.(*builder).varLengthPool, paramValue) - return paramValue -} - -// addParamOn adds a parameter to this block whose value is already allocated. -func (bb *basicBlock) addParamOn(b *builder, value Value) { - bb.params = bb.params.Append(&b.varLengthPool, value) -} - -// Params implements BasicBlock.Params. -func (bb *basicBlock) Params() int { - return len(bb.params.View()) -} - -// Param implements BasicBlock.Param. -func (bb *basicBlock) Param(i int) Value { - return bb.params.View()[i] -} - -// Valid implements BasicBlock.Valid. -func (bb *basicBlock) Valid() bool { - return !bb.invalid -} - -// Sealed implements BasicBlock.Sealed. -func (bb *basicBlock) Sealed() bool { - return bb.sealed -} - -// insertInstruction implements BasicBlock.InsertInstruction. -func (bb *basicBlock) insertInstruction(b *builder, next *Instruction) { - current := bb.currentInstr - if current != nil { - current.next = next - next.prev = current - } else { - bb.rootInstr = next - } - bb.currentInstr = next - - switch next.opcode { - case OpcodeJump, OpcodeBrz, OpcodeBrnz: - target := BasicBlockID(next.rValue) - b.basicBlock(target).addPred(bb, next) - case OpcodeBrTable: - for _, _target := range next.rValues.View() { - target := BasicBlockID(_target) - b.basicBlock(target).addPred(bb, next) - } - } -} - -// NumPreds implements BasicBlock.NumPreds. -func (bb *basicBlock) NumPreds() int { - return len(bb.preds) -} - -// Preds implements BasicBlock.Preds. -func (bb *basicBlock) Preds() int { - return len(bb.preds) -} - -// Pred implements BasicBlock.Pred. -func (bb *basicBlock) Pred(i int) BasicBlock { - return bb.preds[i].blk -} - -// Succs implements BasicBlock.Succs. -func (bb *basicBlock) Succs() int { - return len(bb.success) -} - -// Succ implements BasicBlock.Succ. -func (bb *basicBlock) Succ(i int) BasicBlock { - return bb.success[i] -} - -// Root implements BasicBlock.Root. -func (bb *basicBlock) Root() *Instruction { - return bb.rootInstr -} - -// Tail implements BasicBlock.Tail. -func (bb *basicBlock) Tail() *Instruction { - return bb.currentInstr -} - -// reset resets the basicBlock to its initial state so that it can be reused for another function. -func resetBasicBlock(bb *basicBlock) { - bb.params = ValuesNil - bb.rootInstr, bb.currentInstr = nil, nil - bb.preds = bb.preds[:0] - bb.success = bb.success[:0] - bb.invalid, bb.sealed = false, false - bb.singlePred = nil - bb.unknownValues = bb.unknownValues[:0] - bb.lastDefinitions = wazevoapi.ResetMap(bb.lastDefinitions) - bb.reversePostOrder = -1 - bb.visited = 0 - bb.loopNestingForestChildren = basicBlockVarLengthNil - bb.loopHeader = false - bb.sibling = nil - bb.child = nil -} - -// addPred adds a predecessor to this block specified by the branch instruction. -func (bb *basicBlock) addPred(blk BasicBlock, branch *Instruction) { - if bb.sealed { - panic("BUG: trying to add predecessor to a sealed block: " + bb.Name()) - } - - pred := blk.(*basicBlock) - for i := range bb.preds { - existingPred := &bb.preds[i] - if existingPred.blk == pred && existingPred.branch != branch { - // If the target is already added, then this must come from the same BrTable, - // otherwise such redundant branch should be eliminated by the frontend. (which should be simpler). - panic(fmt.Sprintf("BUG: redundant non BrTable jumps in %s whose targes are the same", bb.Name())) - } - } - - bb.preds = append(bb.preds, basicBlockPredecessorInfo{ - blk: pred, - branch: branch, - }) - - pred.success = append(pred.success, bb) -} - -// formatHeader returns the string representation of the header of the basicBlock. -func (bb *basicBlock) formatHeader(b Builder) string { - ps := make([]string, len(bb.params.View())) - for i, p := range bb.params.View() { - ps[i] = p.formatWithType(b) - } - - if len(bb.preds) > 0 { - preds := make([]string, 0, len(bb.preds)) - for _, pred := range bb.preds { - if pred.blk.invalid { - continue - } - preds = append(preds, fmt.Sprintf("blk%d", pred.blk.id)) - - } - return fmt.Sprintf("blk%d: (%s) <-- (%s)", - bb.id, strings.Join(ps, ","), strings.Join(preds, ",")) - } else { - return fmt.Sprintf("blk%d: (%s)", bb.id, strings.Join(ps, ", ")) - } -} - -// validates validates the basicBlock for debugging purpose. -func (bb *basicBlock) validate(b *builder) { - if bb.invalid { - panic("BUG: trying to validate an invalid block: " + bb.Name()) - } - if len(bb.preds) > 0 { - for _, pred := range bb.preds { - if pred.branch.opcode != OpcodeBrTable { - blockID := int(pred.branch.rValue) - target := b.basicBlocksPool.View(blockID) - if target != bb { - panic(fmt.Sprintf("BUG: '%s' is not branch to %s, but to %s", - pred.branch.Format(b), bb.Name(), target.Name())) - } - } - - var exp int - if bb.ReturnBlock() { - exp = len(b.currentSignature.Results) - } else { - exp = len(bb.params.View()) - } - - if len(pred.branch.vs.View()) != exp { - panic(fmt.Sprintf( - "BUG: len(argument at %s) != len(params at %s): %d != %d: %s", - pred.blk.Name(), bb.Name(), - len(pred.branch.vs.View()), len(bb.params.View()), pred.branch.Format(b), - )) - } - - } - } -} - -// String implements fmt.Stringer for debugging purpose only. -func (bb *basicBlock) String() string { - return strconv.Itoa(int(bb.id)) -} - -// LoopNestingForestChildren implements BasicBlock.LoopNestingForestChildren. -func (bb *basicBlock) LoopNestingForestChildren() []BasicBlock { - return bb.loopNestingForestChildren.View() -} - -// LoopHeader implements BasicBlock.LoopHeader. -func (bb *basicBlock) LoopHeader() bool { - return bb.loopHeader -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go deleted file mode 100644 index fb98298f7..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go +++ /dev/null @@ -1,32 +0,0 @@ -package ssa - -import ( - "slices" -) - -func sortBlocks(blocks []*basicBlock) { - slices.SortFunc(blocks, func(i, j *basicBlock) int { - jIsReturn := j.ReturnBlock() - iIsReturn := i.ReturnBlock() - if iIsReturn && jIsReturn { - return 0 - } - if jIsReturn { - return 1 - } - if iIsReturn { - return -1 - } - iRoot, jRoot := i.rootInstr, j.rootInstr - if iRoot == nil && jRoot == nil { // For testing. - return 0 - } - if jRoot == nil { - return 1 - } - if iRoot == nil { - return -1 - } - return i.rootInstr.id - j.rootInstr.id - }) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go deleted file mode 100644 index 43dd7d292..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go +++ /dev/null @@ -1,790 +0,0 @@ -package ssa - -import ( - "fmt" - "sort" - "strings" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -// Builder is used to builds SSA consisting of Basic Blocks per function. -type Builder interface { - // Init must be called to reuse this builder for the next function. - Init(typ *Signature) - - // Signature returns the Signature of the currently-compiled function. - Signature() *Signature - - // BlockIDMax returns the maximum value of BasicBlocksID existing in the currently-compiled function. - BlockIDMax() BasicBlockID - - // AllocateBasicBlock creates a basic block in SSA function. - AllocateBasicBlock() BasicBlock - - // CurrentBlock returns the currently handled BasicBlock which is set by the latest call to SetCurrentBlock. - CurrentBlock() BasicBlock - - // EntryBlock returns the entry BasicBlock of the currently-compiled function. - EntryBlock() BasicBlock - - // SetCurrentBlock sets the instruction insertion target to the BasicBlock `b`. - SetCurrentBlock(b BasicBlock) - - // DeclareVariable declares a Variable of the given Type. - DeclareVariable(Type) Variable - - // DefineVariable defines a variable in the `block` with value. - // The defining instruction will be inserted into the `block`. - DefineVariable(variable Variable, value Value, block BasicBlock) - - // DefineVariableInCurrentBB is the same as DefineVariable except the definition is - // inserted into the current BasicBlock. Alias to DefineVariable(x, y, CurrentBlock()). - DefineVariableInCurrentBB(variable Variable, value Value) - - // AllocateInstruction returns a new Instruction. - AllocateInstruction() *Instruction - - // InsertInstruction executes BasicBlock.InsertInstruction for the currently handled basic block. - InsertInstruction(raw *Instruction) - - // allocateValue allocates an unused Value. - allocateValue(typ Type) Value - - // MustFindValue searches the latest definition of the given Variable and returns the result. - MustFindValue(variable Variable) Value - - // FindValueInLinearPath tries to find the latest definition of the given Variable in the linear path to the current BasicBlock. - // If it cannot find the definition, or it's not sealed yet, it returns ValueInvalid. - FindValueInLinearPath(variable Variable) Value - - // Seal declares that we've known all the predecessors to this block and were added via AddPred. - // After calling this, AddPred will be forbidden. - Seal(blk BasicBlock) - - // AnnotateValue is for debugging purpose. - AnnotateValue(value Value, annotation string) - - // DeclareSignature appends the *Signature to be referenced by various instructions (e.g. OpcodeCall). - DeclareSignature(signature *Signature) - - // Signatures returns the slice of declared Signatures. - Signatures() []*Signature - - // ResolveSignature returns the Signature which corresponds to SignatureID. - ResolveSignature(id SignatureID) *Signature - - // RunPasses runs various passes on the constructed SSA function. - RunPasses() - - // Format returns the debugging string of the SSA function. - Format() string - - // BlockIteratorBegin initializes the state to iterate over all the valid BasicBlock(s) compiled. - // Combined with BlockIteratorNext, we can use this like: - // - // for blk := builder.BlockIteratorBegin(); blk != nil; blk = builder.BlockIteratorNext() { - // // ... - // } - // - // The returned blocks are ordered in the order of AllocateBasicBlock being called. - BlockIteratorBegin() BasicBlock - - // BlockIteratorNext advances the state for iteration initialized by BlockIteratorBegin. - // Returns nil if there's no unseen BasicBlock. - BlockIteratorNext() BasicBlock - - // ValuesInfo returns the data per Value used to lower the SSA in backend. - // This is indexed by ValueID. - ValuesInfo() []ValueInfo - - // BlockIteratorReversePostOrderBegin is almost the same as BlockIteratorBegin except it returns the BasicBlock in the reverse post-order. - // This is available after RunPasses is run. - BlockIteratorReversePostOrderBegin() BasicBlock - - // BlockIteratorReversePostOrderNext is almost the same as BlockIteratorPostOrderNext except it returns the BasicBlock in the reverse post-order. - // This is available after RunPasses is run. - BlockIteratorReversePostOrderNext() BasicBlock - - // ReturnBlock returns the BasicBlock which is used to return from the function. - ReturnBlock() BasicBlock - - // InsertUndefined inserts an undefined instruction at the current position. - InsertUndefined() - - // SetCurrentSourceOffset sets the current source offset. The incoming instruction will be annotated with this offset. - SetCurrentSourceOffset(line SourceOffset) - - // LoopNestingForestRoots returns the roots of the loop nesting forest. - LoopNestingForestRoots() []BasicBlock - - // LowestCommonAncestor returns the lowest common ancestor in the dominator tree of the given BasicBlock(s). - LowestCommonAncestor(blk1, blk2 BasicBlock) BasicBlock - - // Idom returns the immediate dominator of the given BasicBlock. - Idom(blk BasicBlock) BasicBlock - - // VarLengthPool returns the VarLengthPool of Value. - VarLengthPool() *wazevoapi.VarLengthPool[Value] - - // InsertZeroValue inserts a zero value constant instruction of the given type. - InsertZeroValue(t Type) - - // BasicBlock returns the BasicBlock of the given ID. - BasicBlock(id BasicBlockID) BasicBlock - - // InstructionOfValue returns the Instruction that produces the given Value or nil if the Value is not produced by any Instruction. - InstructionOfValue(v Value) *Instruction -} - -// NewBuilder returns a new Builder implementation. -func NewBuilder() Builder { - return &builder{ - instructionsPool: wazevoapi.NewPool[Instruction](resetInstruction), - basicBlocksPool: wazevoapi.NewPool[basicBlock](resetBasicBlock), - varLengthBasicBlockPool: wazevoapi.NewVarLengthPool[BasicBlock](), - varLengthPool: wazevoapi.NewVarLengthPool[Value](), - valueAnnotations: make(map[ValueID]string), - signatures: make(map[SignatureID]*Signature), - returnBlk: &basicBlock{id: basicBlockIDReturnBlock}, - } -} - -// builder implements Builder interface. -type builder struct { - basicBlocksPool wazevoapi.Pool[basicBlock] - instructionsPool wazevoapi.Pool[Instruction] - varLengthPool wazevoapi.VarLengthPool[Value] - signatures map[SignatureID]*Signature - currentSignature *Signature - - // reversePostOrderedBasicBlocks are the BasicBlock(s) ordered in the reverse post-order after passCalculateImmediateDominators. - reversePostOrderedBasicBlocks []*basicBlock - currentBB *basicBlock - returnBlk *basicBlock - - // nextValueID is used by builder.AllocateValue. - nextValueID ValueID - // nextVariable is used by builder.AllocateVariable. - nextVariable Variable - - // valueAnnotations contains the annotations for each Value, only used for debugging. - valueAnnotations map[ValueID]string - - // valuesInfo contains the data per Value used to lower the SSA in backend. This is indexed by ValueID. - valuesInfo []ValueInfo - - // dominators stores the immediate dominator of each BasicBlock. - // The index is blockID of the BasicBlock. - dominators []*basicBlock - sparseTree dominatorSparseTree - - varLengthBasicBlockPool wazevoapi.VarLengthPool[BasicBlock] - - // loopNestingForestRoots are the roots of the loop nesting forest. - loopNestingForestRoots []BasicBlock - - // The followings are used for optimization passes/deterministic compilation. - instStack []*Instruction - blkStack []*basicBlock - blkStack2 []*basicBlock - redundantParams []redundantParam - - // blockIterCur is used to implement blockIteratorBegin and blockIteratorNext. - blockIterCur int - - // donePreBlockLayoutPasses is true if all the passes before LayoutBlocks are called. - donePreBlockLayoutPasses bool - // doneBlockLayout is true if LayoutBlocks is called. - doneBlockLayout bool - // donePostBlockLayoutPasses is true if all the passes after LayoutBlocks are called. - donePostBlockLayoutPasses bool - - currentSourceOffset SourceOffset - - // zeros are the zero value constants for each type. - zeros [typeEnd]Value -} - -// ValueInfo contains the data per Value used to lower the SSA in backend. -type ValueInfo struct { - // RefCount is the reference count of the Value. - RefCount uint32 - alias Value -} - -// redundantParam is a pair of the index of the redundant parameter and the Value. -// This is used to eliminate the redundant parameters in the optimization pass. -type redundantParam struct { - // index is the index of the redundant parameter in the basicBlock. - index int - // uniqueValue is the Value which is passed to the redundant parameter. - uniqueValue Value -} - -// BasicBlock implements Builder.BasicBlock. -func (b *builder) BasicBlock(id BasicBlockID) BasicBlock { - return b.basicBlock(id) -} - -func (b *builder) basicBlock(id BasicBlockID) *basicBlock { - if id == basicBlockIDReturnBlock { - return b.returnBlk - } - return b.basicBlocksPool.View(int(id)) -} - -// InsertZeroValue implements Builder.InsertZeroValue. -func (b *builder) InsertZeroValue(t Type) { - if b.zeros[t].Valid() { - return - } - zeroInst := b.AllocateInstruction() - switch t { - case TypeI32: - zeroInst.AsIconst32(0) - case TypeI64: - zeroInst.AsIconst64(0) - case TypeF32: - zeroInst.AsF32const(0) - case TypeF64: - zeroInst.AsF64const(0) - case TypeV128: - zeroInst.AsVconst(0, 0) - default: - panic("TODO: " + t.String()) - } - b.zeros[t] = zeroInst.Insert(b).Return() -} - -func (b *builder) VarLengthPool() *wazevoapi.VarLengthPool[Value] { - return &b.varLengthPool -} - -// ReturnBlock implements Builder.ReturnBlock. -func (b *builder) ReturnBlock() BasicBlock { - return b.returnBlk -} - -// Init implements Builder.Reset. -func (b *builder) Init(s *Signature) { - b.nextVariable = 0 - b.currentSignature = s - b.zeros = [typeEnd]Value{ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid} - resetBasicBlock(b.returnBlk) - b.instructionsPool.Reset() - b.basicBlocksPool.Reset() - b.varLengthPool.Reset() - b.varLengthBasicBlockPool.Reset() - b.donePreBlockLayoutPasses = false - b.doneBlockLayout = false - b.donePostBlockLayoutPasses = false - for _, sig := range b.signatures { - sig.used = false - } - - b.redundantParams = b.redundantParams[:0] - b.blkStack = b.blkStack[:0] - b.blkStack2 = b.blkStack2[:0] - b.dominators = b.dominators[:0] - b.loopNestingForestRoots = b.loopNestingForestRoots[:0] - b.basicBlocksPool.Reset() - - for v := ValueID(0); v < b.nextValueID; v++ { - delete(b.valueAnnotations, v) - b.valuesInfo[v] = ValueInfo{alias: ValueInvalid} - } - b.nextValueID = 0 - b.reversePostOrderedBasicBlocks = b.reversePostOrderedBasicBlocks[:0] - b.doneBlockLayout = false - b.currentSourceOffset = sourceOffsetUnknown -} - -// Signature implements Builder.Signature. -func (b *builder) Signature() *Signature { - return b.currentSignature -} - -// AnnotateValue implements Builder.AnnotateValue. -func (b *builder) AnnotateValue(value Value, a string) { - b.valueAnnotations[value.ID()] = a -} - -// AllocateInstruction implements Builder.AllocateInstruction. -func (b *builder) AllocateInstruction() *Instruction { - instr := b.instructionsPool.Allocate() - instr.id = b.instructionsPool.Allocated() - return instr -} - -// DeclareSignature implements Builder.AnnotateValue. -func (b *builder) DeclareSignature(s *Signature) { - b.signatures[s.ID] = s - s.used = false -} - -// Signatures implements Builder.Signatures. -func (b *builder) Signatures() (ret []*Signature) { - for _, sig := range b.signatures { - ret = append(ret, sig) - } - sort.Slice(ret, func(i, j int) bool { - return ret[i].ID < ret[j].ID - }) - return -} - -// SetCurrentSourceOffset implements Builder.SetCurrentSourceOffset. -func (b *builder) SetCurrentSourceOffset(l SourceOffset) { - b.currentSourceOffset = l -} - -func (b *builder) usedSignatures() (ret []*Signature) { - for _, sig := range b.signatures { - if sig.used { - ret = append(ret, sig) - } - } - sort.Slice(ret, func(i, j int) bool { - return ret[i].ID < ret[j].ID - }) - return -} - -// ResolveSignature implements Builder.ResolveSignature. -func (b *builder) ResolveSignature(id SignatureID) *Signature { - return b.signatures[id] -} - -// AllocateBasicBlock implements Builder.AllocateBasicBlock. -func (b *builder) AllocateBasicBlock() BasicBlock { - return b.allocateBasicBlock() -} - -// allocateBasicBlock allocates a new basicBlock. -func (b *builder) allocateBasicBlock() *basicBlock { - id := BasicBlockID(b.basicBlocksPool.Allocated()) - blk := b.basicBlocksPool.Allocate() - blk.id = id - return blk -} - -// Idom implements Builder.Idom. -func (b *builder) Idom(blk BasicBlock) BasicBlock { - return b.dominators[blk.ID()] -} - -// InsertInstruction implements Builder.InsertInstruction. -func (b *builder) InsertInstruction(instr *Instruction) { - b.currentBB.insertInstruction(b, instr) - - if l := b.currentSourceOffset; l.Valid() { - // Emit the source offset info only when the instruction has side effect because - // these are the only instructions that are accessed by stack unwinding. - // This reduces the significant amount of the offset info in the binary. - if instr.sideEffect() != sideEffectNone { - instr.annotateSourceOffset(l) - } - } - - resultTypesFn := instructionReturnTypes[instr.opcode] - if resultTypesFn == nil { - panic("TODO: " + instr.Format(b)) - } - - t1, ts := resultTypesFn(b, instr) - if t1.invalid() { - return - } - - r1 := b.allocateValue(t1) - instr.rValue = r1.setInstructionID(instr.id) - - tsl := len(ts) - if tsl == 0 { - return - } - - rValues := b.varLengthPool.Allocate(tsl) - for i := 0; i < tsl; i++ { - rn := b.allocateValue(ts[i]) - rValues = rValues.Append(&b.varLengthPool, rn.setInstructionID(instr.id)) - } - instr.rValues = rValues -} - -// DefineVariable implements Builder.DefineVariable. -func (b *builder) DefineVariable(variable Variable, value Value, block BasicBlock) { - bb := block.(*basicBlock) - bb.lastDefinitions[variable] = value -} - -// DefineVariableInCurrentBB implements Builder.DefineVariableInCurrentBB. -func (b *builder) DefineVariableInCurrentBB(variable Variable, value Value) { - b.DefineVariable(variable, value, b.currentBB) -} - -// SetCurrentBlock implements Builder.SetCurrentBlock. -func (b *builder) SetCurrentBlock(bb BasicBlock) { - b.currentBB = bb.(*basicBlock) -} - -// CurrentBlock implements Builder.CurrentBlock. -func (b *builder) CurrentBlock() BasicBlock { - return b.currentBB -} - -// EntryBlock implements Builder.EntryBlock. -func (b *builder) EntryBlock() BasicBlock { - return b.entryBlk() -} - -// DeclareVariable implements Builder.DeclareVariable. -func (b *builder) DeclareVariable(typ Type) Variable { - v := b.nextVariable - b.nextVariable++ - return v.setType(typ) -} - -// allocateValue implements Builder.AllocateValue. -func (b *builder) allocateValue(typ Type) (v Value) { - v = Value(b.nextValueID) - v = v.setType(typ) - b.nextValueID++ - return -} - -// FindValueInLinearPath implements Builder.FindValueInLinearPath. -func (b *builder) FindValueInLinearPath(variable Variable) Value { - return b.findValueInLinearPath(variable, b.currentBB) -} - -func (b *builder) findValueInLinearPath(variable Variable, blk *basicBlock) Value { - if val, ok := blk.lastDefinitions[variable]; ok { - return val - } else if !blk.sealed { - return ValueInvalid - } - - if pred := blk.singlePred; pred != nil { - // If this block is sealed and have only one predecessor, - // we can use the value in that block without ambiguity on definition. - return b.findValueInLinearPath(variable, pred) - } - if len(blk.preds) == 1 { - panic("BUG") - } - return ValueInvalid -} - -// MustFindValue implements Builder.MustFindValue. -func (b *builder) MustFindValue(variable Variable) Value { - return b.findValue(variable.getType(), variable, b.currentBB) -} - -// findValue recursively tries to find the latest definition of a `variable`. The algorithm is described in -// the section 2 of the paper https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf. -// -// TODO: reimplement this in iterative, not recursive, to avoid stack overflow. -func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value { - if val, ok := blk.lastDefinitions[variable]; ok { - // The value is already defined in this block! - return val - } else if !blk.sealed { // Incomplete CFG as in the paper. - // If this is not sealed, that means it might have additional unknown predecessor later on. - // So we temporarily define the placeholder value here (not add as a parameter yet!), - // and record it as unknown. - // The unknown values are resolved when we call seal this block via BasicBlock.Seal(). - value := b.allocateValue(typ) - if wazevoapi.SSALoggingEnabled { - fmt.Printf("adding unknown value placeholder for %s at %d\n", variable, blk.id) - } - blk.lastDefinitions[variable] = value - blk.unknownValues = append(blk.unknownValues, unknownValue{ - variable: variable, - value: value, - }) - return value - } else if blk.EntryBlock() { - // If this is the entry block, we reach the uninitialized variable which has zero value. - return b.zeros[variable.getType()] - } - - if pred := blk.singlePred; pred != nil { - // If this block is sealed and have only one predecessor, - // we can use the value in that block without ambiguity on definition. - return b.findValue(typ, variable, pred) - } else if len(blk.preds) == 0 { - panic("BUG: value is not defined for " + variable.String()) - } - - // If this block has multiple predecessors, we have to gather the definitions, - // and treat them as an argument to this block. - // - // But before that, we have to check if the possible definitions are the same Value. - tmpValue := b.allocateValue(typ) - // Break the cycle by defining the variable with the tmpValue. - b.DefineVariable(variable, tmpValue, blk) - // Check all the predecessors if they have the same definition. - uniqueValue := ValueInvalid - for i := range blk.preds { - predValue := b.findValue(typ, variable, blk.preds[i].blk) - if uniqueValue == ValueInvalid { - uniqueValue = predValue - } else if uniqueValue != predValue { - uniqueValue = ValueInvalid - break - } - } - - if uniqueValue != ValueInvalid { - // If all the predecessors have the same definition, we can use that value. - b.alias(tmpValue, uniqueValue) - return uniqueValue - } else { - // Otherwise, add the tmpValue to this block as a parameter which may or may not be redundant, but - // later we eliminate trivial params in an optimization pass. This must be done before finding the - // definitions in the predecessors so that we can break the cycle. - blk.addParamOn(b, tmpValue) - // After the new param is added, we have to manipulate the original branching instructions - // in predecessors so that they would pass the definition of `variable` as the argument to - // the newly added PHI. - for i := range blk.preds { - pred := &blk.preds[i] - value := b.findValue(typ, variable, pred.blk) - pred.branch.addArgumentBranchInst(b, value) - } - return tmpValue - } -} - -// Seal implements Builder.Seal. -func (b *builder) Seal(raw BasicBlock) { - blk := raw.(*basicBlock) - if len(blk.preds) == 1 { - blk.singlePred = blk.preds[0].blk - } - blk.sealed = true - - for _, v := range blk.unknownValues { - variable, phiValue := v.variable, v.value - typ := variable.getType() - blk.addParamOn(b, phiValue) - for i := range blk.preds { - pred := &blk.preds[i] - predValue := b.findValue(typ, variable, pred.blk) - if !predValue.Valid() { - panic("BUG: value is not defined anywhere in the predecessors in the CFG") - } - pred.branch.addArgumentBranchInst(b, predValue) - } - } -} - -// Format implements Builder.Format. -func (b *builder) Format() string { - str := strings.Builder{} - usedSigs := b.usedSignatures() - if len(usedSigs) > 0 { - str.WriteByte('\n') - str.WriteString("signatures:\n") - for _, sig := range usedSigs { - str.WriteByte('\t') - str.WriteString(sig.String()) - str.WriteByte('\n') - } - } - - var iterBegin, iterNext func() *basicBlock - if b.doneBlockLayout { - iterBegin, iterNext = b.blockIteratorReversePostOrderBegin, b.blockIteratorReversePostOrderNext - } else { - iterBegin, iterNext = b.blockIteratorBegin, b.blockIteratorNext - } - for bb := iterBegin(); bb != nil; bb = iterNext() { - str.WriteByte('\n') - str.WriteString(bb.formatHeader(b)) - str.WriteByte('\n') - - for cur := bb.Root(); cur != nil; cur = cur.Next() { - str.WriteByte('\t') - str.WriteString(cur.Format(b)) - str.WriteByte('\n') - } - } - return str.String() -} - -// BlockIteratorNext implements Builder.BlockIteratorNext. -func (b *builder) BlockIteratorNext() BasicBlock { - if blk := b.blockIteratorNext(); blk == nil { - return nil // BasicBlock((*basicBlock)(nil)) != BasicBlock(nil) - } else { - return blk - } -} - -// BlockIteratorNext implements Builder.BlockIteratorNext. -func (b *builder) blockIteratorNext() *basicBlock { - index := b.blockIterCur - for { - if index == b.basicBlocksPool.Allocated() { - return nil - } - ret := b.basicBlocksPool.View(index) - index++ - if !ret.invalid { - b.blockIterCur = index - return ret - } - } -} - -// BlockIteratorBegin implements Builder.BlockIteratorBegin. -func (b *builder) BlockIteratorBegin() BasicBlock { - return b.blockIteratorBegin() -} - -// BlockIteratorBegin implements Builder.BlockIteratorBegin. -func (b *builder) blockIteratorBegin() *basicBlock { - b.blockIterCur = 0 - return b.blockIteratorNext() -} - -// BlockIteratorReversePostOrderBegin implements Builder.BlockIteratorReversePostOrderBegin. -func (b *builder) BlockIteratorReversePostOrderBegin() BasicBlock { - return b.blockIteratorReversePostOrderBegin() -} - -// BlockIteratorBegin implements Builder.BlockIteratorBegin. -func (b *builder) blockIteratorReversePostOrderBegin() *basicBlock { - b.blockIterCur = 0 - return b.blockIteratorReversePostOrderNext() -} - -// BlockIteratorReversePostOrderNext implements Builder.BlockIteratorReversePostOrderNext. -func (b *builder) BlockIteratorReversePostOrderNext() BasicBlock { - if blk := b.blockIteratorReversePostOrderNext(); blk == nil { - return nil // BasicBlock((*basicBlock)(nil)) != BasicBlock(nil) - } else { - return blk - } -} - -// BlockIteratorNext implements Builder.BlockIteratorNext. -func (b *builder) blockIteratorReversePostOrderNext() *basicBlock { - if b.blockIterCur >= len(b.reversePostOrderedBasicBlocks) { - return nil - } else { - ret := b.reversePostOrderedBasicBlocks[b.blockIterCur] - b.blockIterCur++ - return ret - } -} - -// ValuesInfo implements Builder.ValuesInfo. -func (b *builder) ValuesInfo() []ValueInfo { - return b.valuesInfo -} - -// alias records the alias of the given values. The alias(es) will be -// eliminated in the optimization pass via resolveArgumentAlias. -func (b *builder) alias(dst, src Value) { - did := int(dst.ID()) - if did >= len(b.valuesInfo) { - l := did + 1 - len(b.valuesInfo) - b.valuesInfo = append(b.valuesInfo, make([]ValueInfo, l)...) - view := b.valuesInfo[len(b.valuesInfo)-l:] - for i := range view { - view[i].alias = ValueInvalid - } - } - b.valuesInfo[did].alias = src -} - -// resolveArgumentAlias resolves the alias of the arguments of the given instruction. -func (b *builder) resolveArgumentAlias(instr *Instruction) { - if instr.v.Valid() { - instr.v = b.resolveAlias(instr.v) - } - - if instr.v2.Valid() { - instr.v2 = b.resolveAlias(instr.v2) - } - - if instr.v3.Valid() { - instr.v3 = b.resolveAlias(instr.v3) - } - - view := instr.vs.View() - for i, v := range view { - view[i] = b.resolveAlias(v) - } -} - -// resolveAlias resolves the alias of the given value. -func (b *builder) resolveAlias(v Value) Value { - info := b.valuesInfo - l := ValueID(len(info)) - // Some aliases are chained, so we need to resolve them recursively. - for { - vid := v.ID() - if vid < l && info[vid].alias.Valid() { - v = info[vid].alias - } else { - break - } - } - return v -} - -// entryBlk returns the entry block of the function. -func (b *builder) entryBlk() *basicBlock { - return b.basicBlocksPool.View(0) -} - -// isDominatedBy returns true if the given block `n` is dominated by the given block `d`. -// Before calling this, the builder must pass by passCalculateImmediateDominators. -func (b *builder) isDominatedBy(n *basicBlock, d *basicBlock) bool { - if len(b.dominators) == 0 { - panic("BUG: passCalculateImmediateDominators must be called before calling isDominatedBy") - } - ent := b.entryBlk() - doms := b.dominators - for n != d && n != ent { - n = doms[n.id] - } - return n == d -} - -// BlockIDMax implements Builder.BlockIDMax. -func (b *builder) BlockIDMax() BasicBlockID { - return BasicBlockID(b.basicBlocksPool.Allocated()) -} - -// InsertUndefined implements Builder.InsertUndefined. -func (b *builder) InsertUndefined() { - instr := b.AllocateInstruction() - instr.opcode = OpcodeUndefined - b.InsertInstruction(instr) -} - -// LoopNestingForestRoots implements Builder.LoopNestingForestRoots. -func (b *builder) LoopNestingForestRoots() []BasicBlock { - return b.loopNestingForestRoots -} - -// LowestCommonAncestor implements Builder.LowestCommonAncestor. -func (b *builder) LowestCommonAncestor(blk1, blk2 BasicBlock) BasicBlock { - return b.sparseTree.findLCA(blk1.ID(), blk2.ID()) -} - -// InstructionOfValue returns the instruction that produces the given Value, or nil -// if the Value is not produced by any instruction. -func (b *builder) InstructionOfValue(v Value) *Instruction { - instrID := v.instructionID() - if instrID <= 0 { - return nil - } - return b.instructionsPool.View(instrID - 1) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/cmp.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/cmp.go deleted file mode 100644 index 15b62ca8e..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/cmp.go +++ /dev/null @@ -1,107 +0,0 @@ -package ssa - -// IntegerCmpCond represents a condition for integer comparison. -type IntegerCmpCond byte - -const ( - // IntegerCmpCondInvalid represents an invalid condition. - IntegerCmpCondInvalid IntegerCmpCond = iota - // IntegerCmpCondEqual represents "==". - IntegerCmpCondEqual - // IntegerCmpCondNotEqual represents "!=". - IntegerCmpCondNotEqual - // IntegerCmpCondSignedLessThan represents Signed "<". - IntegerCmpCondSignedLessThan - // IntegerCmpCondSignedGreaterThanOrEqual represents Signed ">=". - IntegerCmpCondSignedGreaterThanOrEqual - // IntegerCmpCondSignedGreaterThan represents Signed ">". - IntegerCmpCondSignedGreaterThan - // IntegerCmpCondSignedLessThanOrEqual represents Signed "<=". - IntegerCmpCondSignedLessThanOrEqual - // IntegerCmpCondUnsignedLessThan represents Unsigned "<". - IntegerCmpCondUnsignedLessThan - // IntegerCmpCondUnsignedGreaterThanOrEqual represents Unsigned ">=". - IntegerCmpCondUnsignedGreaterThanOrEqual - // IntegerCmpCondUnsignedGreaterThan represents Unsigned ">". - IntegerCmpCondUnsignedGreaterThan - // IntegerCmpCondUnsignedLessThanOrEqual represents Unsigned "<=". - IntegerCmpCondUnsignedLessThanOrEqual -) - -// String implements fmt.Stringer. -func (i IntegerCmpCond) String() string { - switch i { - case IntegerCmpCondEqual: - return "eq" - case IntegerCmpCondNotEqual: - return "neq" - case IntegerCmpCondSignedLessThan: - return "lt_s" - case IntegerCmpCondSignedGreaterThanOrEqual: - return "ge_s" - case IntegerCmpCondSignedGreaterThan: - return "gt_s" - case IntegerCmpCondSignedLessThanOrEqual: - return "le_s" - case IntegerCmpCondUnsignedLessThan: - return "lt_u" - case IntegerCmpCondUnsignedGreaterThanOrEqual: - return "ge_u" - case IntegerCmpCondUnsignedGreaterThan: - return "gt_u" - case IntegerCmpCondUnsignedLessThanOrEqual: - return "le_u" - default: - panic("invalid integer comparison condition") - } -} - -// Signed returns true if the condition is signed integer comparison. -func (i IntegerCmpCond) Signed() bool { - switch i { - case IntegerCmpCondSignedLessThan, IntegerCmpCondSignedGreaterThanOrEqual, - IntegerCmpCondSignedGreaterThan, IntegerCmpCondSignedLessThanOrEqual: - return true - default: - return false - } -} - -type FloatCmpCond byte - -const ( - // FloatCmpCondInvalid represents an invalid condition. - FloatCmpCondInvalid FloatCmpCond = iota - // FloatCmpCondEqual represents "==". - FloatCmpCondEqual - // FloatCmpCondNotEqual represents "!=". - FloatCmpCondNotEqual - // FloatCmpCondLessThan represents "<". - FloatCmpCondLessThan - // FloatCmpCondLessThanOrEqual represents "<=". - FloatCmpCondLessThanOrEqual - // FloatCmpCondGreaterThan represents ">". - FloatCmpCondGreaterThan - // FloatCmpCondGreaterThanOrEqual represents ">=". - FloatCmpCondGreaterThanOrEqual -) - -// String implements fmt.Stringer. -func (f FloatCmpCond) String() string { - switch f { - case FloatCmpCondEqual: - return "eq" - case FloatCmpCondNotEqual: - return "neq" - case FloatCmpCondLessThan: - return "lt" - case FloatCmpCondLessThanOrEqual: - return "le" - case FloatCmpCondGreaterThan: - return "gt" - case FloatCmpCondGreaterThanOrEqual: - return "ge" - default: - panic("invalid float comparison condition") - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/funcref.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/funcref.go deleted file mode 100644 index d9620762a..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/funcref.go +++ /dev/null @@ -1,12 +0,0 @@ -package ssa - -import "fmt" - -// FuncRef is a unique identifier for a function of the frontend, -// and is used to reference the function in function call. -type FuncRef uint32 - -// String implements fmt.Stringer. -func (r FuncRef) String() string { - return fmt.Sprintf("f%d", r) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go deleted file mode 100644 index 9a3d1da6e..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go +++ /dev/null @@ -1,2976 +0,0 @@ -package ssa - -import ( - "fmt" - "math" - "strings" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -// Opcode represents a SSA instruction. -type Opcode uint32 - -// Instruction represents an instruction whose opcode is specified by -// Opcode. Since Go doesn't have union type, we use this flattened type -// for all instructions, and therefore each field has different meaning -// depending on Opcode. -type Instruction struct { - // id is the unique ID of this instruction which ascends from 0 following the order of program. - id int - opcode Opcode - u1, u2 uint64 - v Value - v2 Value - v3 Value - vs Values - typ Type - prev, next *Instruction - - // rValue is the (first) return value of this instruction. - // For branching instructions except for OpcodeBrTable, they hold BlockID to jump cast to Value. - rValue Value - // rValues are the rest of the return values of this instruction. - // For OpcodeBrTable, it holds the list of BlockID to jump cast to Value. - rValues Values - gid InstructionGroupID - sourceOffset SourceOffset - live bool - alreadyLowered bool -} - -// SourceOffset represents the offset of the source of an instruction. -type SourceOffset int64 - -const sourceOffsetUnknown = -1 - -// Valid returns true if this source offset is valid. -func (l SourceOffset) Valid() bool { - return l != sourceOffsetUnknown -} - -func (i *Instruction) annotateSourceOffset(line SourceOffset) { - i.sourceOffset = line -} - -// SourceOffset returns the source offset of this instruction. -func (i *Instruction) SourceOffset() SourceOffset { - return i.sourceOffset -} - -// Opcode returns the opcode of this instruction. -func (i *Instruction) Opcode() Opcode { - return i.opcode -} - -// GroupID returns the InstructionGroupID of this instruction. -func (i *Instruction) GroupID() InstructionGroupID { - return i.gid -} - -// MarkLowered marks this instruction as already lowered. -func (i *Instruction) MarkLowered() { - i.alreadyLowered = true -} - -// Lowered returns true if this instruction is already lowered. -func (i *Instruction) Lowered() bool { - return i.alreadyLowered -} - -// resetInstruction resets this instruction to the initial state. -func resetInstruction(i *Instruction) { - *i = Instruction{} - i.v = ValueInvalid - i.v2 = ValueInvalid - i.v3 = ValueInvalid - i.rValue = ValueInvalid - i.typ = typeInvalid - i.vs = ValuesNil - i.sourceOffset = sourceOffsetUnknown -} - -// InstructionGroupID is assigned to each instruction and represents a group of instructions -// where each instruction is interchangeable with others except for the last instruction -// in the group which has side effects. In short, InstructionGroupID is determined by the side effects of instructions. -// That means, if there's an instruction with side effect between two instructions, then these two instructions -// will have different instructionGroupID. Note that each block always ends with branching, which is with side effects, -// therefore, instructions in different blocks always have different InstructionGroupID(s). -// -// The notable application of this is used in lowering SSA-level instruction to a ISA specific instruction, -// where we eagerly try to merge multiple instructions into single operation etc. Such merging cannot be done -// if these instruction have different InstructionGroupID since it will change the semantics of a program. -// -// See passDeadCodeElimination. -type InstructionGroupID uint32 - -// Returns Value(s) produced by this instruction if any. -// The `first` is the first return value, and `rest` is the rest of the values. -func (i *Instruction) Returns() (first Value, rest []Value) { - if i.IsBranching() { - return ValueInvalid, nil - } - return i.rValue, i.rValues.View() -} - -// Return returns a Value(s) produced by this instruction if any. -// If there's multiple return values, only the first one is returned. -func (i *Instruction) Return() (first Value) { - return i.rValue -} - -// Args returns the arguments to this instruction. -func (i *Instruction) Args() (v1, v2, v3 Value, vs []Value) { - return i.v, i.v2, i.v3, i.vs.View() -} - -// Arg returns the first argument to this instruction. -func (i *Instruction) Arg() Value { - return i.v -} - -// Arg2 returns the first two arguments to this instruction. -func (i *Instruction) Arg2() (Value, Value) { - return i.v, i.v2 -} - -// ArgWithLane returns the first argument to this instruction, and the lane type. -func (i *Instruction) ArgWithLane() (Value, VecLane) { - return i.v, VecLane(i.u1) -} - -// Arg2WithLane returns the first two arguments to this instruction, and the lane type. -func (i *Instruction) Arg2WithLane() (Value, Value, VecLane) { - return i.v, i.v2, VecLane(i.u1) -} - -// ShuffleData returns the first two arguments to this instruction and 2 uint64s `lo`, `hi`. -// -// Note: Each uint64 encodes a sequence of 8 bytes where each byte encodes a VecLane, -// so that the 128bit integer `hi<<64|lo` packs a slice `[16]VecLane`, -// where `lane[0]` is the least significant byte, and `lane[n]` is shifted to offset `n*8`. -func (i *Instruction) ShuffleData() (v Value, v2 Value, lo uint64, hi uint64) { - return i.v, i.v2, i.u1, i.u2 -} - -// Arg3 returns the first three arguments to this instruction. -func (i *Instruction) Arg3() (Value, Value, Value) { - return i.v, i.v2, i.v3 -} - -// Next returns the next instruction laid out next to itself. -func (i *Instruction) Next() *Instruction { - return i.next -} - -// Prev returns the previous instruction laid out prior to itself. -func (i *Instruction) Prev() *Instruction { - return i.prev -} - -// IsBranching returns true if this instruction is a branching instruction. -func (i *Instruction) IsBranching() bool { - switch i.opcode { - case OpcodeJump, OpcodeBrz, OpcodeBrnz, OpcodeBrTable: - return true - default: - return false - } -} - -// TODO: complete opcode comments. -const ( - OpcodeInvalid Opcode = iota - - // OpcodeUndefined is a placeholder for undefined opcode. This can be used for debugging to intentionally - // cause a crash at certain point. - OpcodeUndefined - - // OpcodeJump takes the list of args to the `block` and unconditionally jumps to it. - OpcodeJump - - // OpcodeBrz branches into `blk` with `args` if the value `c` equals zero: `Brz c, blk, args`. - OpcodeBrz - - // OpcodeBrnz branches into `blk` with `args` if the value `c` is not zero: `Brnz c, blk, args`. - OpcodeBrnz - - // OpcodeBrTable takes the index value `index`, and branches into `labelX`. If the `index` is out of range, - // it branches into the last labelN: `BrTable index, [label1, label2, ... labelN]`. - OpcodeBrTable - - // OpcodeExitWithCode exit the execution immediately. - OpcodeExitWithCode - - // OpcodeExitIfTrueWithCode exits the execution immediately if the value `c` is not zero. - OpcodeExitIfTrueWithCode - - // OpcodeReturn returns from the function: `return rvalues`. - OpcodeReturn - - // OpcodeCall calls a function specified by the symbol FN with arguments `args`: `returnvals = Call FN, args...` - // This is a "near" call, which means the call target is known at compile time, and the target is relatively close - // to this function. If the target cannot be reached by near call, the backend fails to compile. - OpcodeCall - - // OpcodeCallIndirect calls a function specified by `callee` which is a function address: `returnvals = call_indirect SIG, callee, args`. - // Note that this is different from call_indirect in Wasm, which also does type checking, etc. - OpcodeCallIndirect - - // OpcodeSplat performs a vector splat operation: `v = Splat.lane x`. - OpcodeSplat - - // OpcodeSwizzle performs a vector swizzle operation: `v = Swizzle.lane x, y`. - OpcodeSwizzle - - // OpcodeInsertlane inserts a lane value into a vector: `v = InsertLane x, y, Idx`. - OpcodeInsertlane - - // OpcodeExtractlane extracts a lane value from a vector: `v = ExtractLane x, Idx`. - OpcodeExtractlane - - // OpcodeLoad loads a Type value from the [base + offset] address: `v = Load base, offset`. - OpcodeLoad - - // OpcodeStore stores a Type value to the [base + offset] address: `Store v, base, offset`. - OpcodeStore - - // OpcodeUload8 loads the 8-bit value from the [base + offset] address, zero-extended to 64 bits: `v = Uload8 base, offset`. - OpcodeUload8 - - // OpcodeSload8 loads the 8-bit value from the [base + offset] address, sign-extended to 64 bits: `v = Sload8 base, offset`. - OpcodeSload8 - - // OpcodeIstore8 stores the 8-bit value to the [base + offset] address, sign-extended to 64 bits: `Istore8 v, base, offset`. - OpcodeIstore8 - - // OpcodeUload16 loads the 16-bit value from the [base + offset] address, zero-extended to 64 bits: `v = Uload16 base, offset`. - OpcodeUload16 - - // OpcodeSload16 loads the 16-bit value from the [base + offset] address, sign-extended to 64 bits: `v = Sload16 base, offset`. - OpcodeSload16 - - // OpcodeIstore16 stores the 16-bit value to the [base + offset] address, zero-extended to 64 bits: `Istore16 v, base, offset`. - OpcodeIstore16 - - // OpcodeUload32 loads the 32-bit value from the [base + offset] address, zero-extended to 64 bits: `v = Uload32 base, offset`. - OpcodeUload32 - - // OpcodeSload32 loads the 32-bit value from the [base + offset] address, sign-extended to 64 bits: `v = Sload32 base, offset`. - OpcodeSload32 - - // OpcodeIstore32 stores the 32-bit value to the [base + offset] address, zero-extended to 64 bits: `Istore16 v, base, offset`. - OpcodeIstore32 - - // OpcodeLoadSplat represents a load that replicates the loaded value to all lanes `v = LoadSplat.lane p, Offset`. - OpcodeLoadSplat - - // OpcodeVZeroExtLoad loads a scalar single/double precision floating point value from the [p + Offset] address, - // and zero-extend it to the V128 value: `v = VExtLoad p, Offset`. - OpcodeVZeroExtLoad - - // OpcodeIconst represents the integer const. - OpcodeIconst - - // OpcodeF32const represents the single-precision const. - OpcodeF32const - - // OpcodeF64const represents the double-precision const. - OpcodeF64const - - // OpcodeVconst represents the 128bit vector const. - OpcodeVconst - - // OpcodeVbor computes binary or between two 128bit vectors: `v = bor x, y`. - OpcodeVbor - - // OpcodeVbxor computes binary xor between two 128bit vectors: `v = bxor x, y`. - OpcodeVbxor - - // OpcodeVband computes binary and between two 128bit vectors: `v = band x, y`. - OpcodeVband - - // OpcodeVbandnot computes binary and-not between two 128bit vectors: `v = bandnot x, y`. - OpcodeVbandnot - - // OpcodeVbnot negates a 128bit vector: `v = bnot x`. - OpcodeVbnot - - // OpcodeVbitselect uses the bits in the control mask c to select the corresponding bit from x when 1 - // and y when 0: `v = bitselect c, x, y`. - OpcodeVbitselect - - // OpcodeShuffle shuffles two vectors using the given 128-bit immediate: `v = shuffle imm, x, y`. - // For each byte in the immediate, a value i in [0, 15] selects the i-th byte in vector x; - // i in [16, 31] selects the (i-16)-th byte in vector y. - OpcodeShuffle - - // OpcodeSelect chooses between two values based on a condition `c`: `v = Select c, x, y`. - OpcodeSelect - - // OpcodeVanyTrue performs a any true operation: `s = VanyTrue a`. - OpcodeVanyTrue - - // OpcodeVallTrue performs a lane-wise all true operation: `s = VallTrue.lane a`. - OpcodeVallTrue - - // OpcodeVhighBits performs a lane-wise extract of the high bits: `v = VhighBits.lane a`. - OpcodeVhighBits - - // OpcodeIcmp compares two integer values with the given condition: `v = icmp Cond, x, y`. - OpcodeIcmp - - // OpcodeVIcmp compares two integer values with the given condition: `v = vicmp Cond, x, y` on vector. - OpcodeVIcmp - - // OpcodeIcmpImm compares an integer value with the immediate value on the given condition: `v = icmp_imm Cond, x, Y`. - OpcodeIcmpImm - - // OpcodeIadd performs an integer addition: `v = Iadd x, y`. - OpcodeIadd - - // OpcodeVIadd performs an integer addition: `v = VIadd.lane x, y` on vector. - OpcodeVIadd - - // OpcodeVSaddSat performs a signed saturating vector addition: `v = VSaddSat.lane x, y` on vector. - OpcodeVSaddSat - - // OpcodeVUaddSat performs an unsigned saturating vector addition: `v = VUaddSat.lane x, y` on vector. - OpcodeVUaddSat - - // OpcodeIsub performs an integer subtraction: `v = Isub x, y`. - OpcodeIsub - - // OpcodeVIsub performs an integer subtraction: `v = VIsub.lane x, y` on vector. - OpcodeVIsub - - // OpcodeVSsubSat performs a signed saturating vector subtraction: `v = VSsubSat.lane x, y` on vector. - OpcodeVSsubSat - - // OpcodeVUsubSat performs an unsigned saturating vector subtraction: `v = VUsubSat.lane x, y` on vector. - OpcodeVUsubSat - - // OpcodeVImin performs a signed integer min: `v = VImin.lane x, y` on vector. - OpcodeVImin - - // OpcodeVUmin performs an unsigned integer min: `v = VUmin.lane x, y` on vector. - OpcodeVUmin - - // OpcodeVImax performs a signed integer max: `v = VImax.lane x, y` on vector. - OpcodeVImax - - // OpcodeVUmax performs an unsigned integer max: `v = VUmax.lane x, y` on vector. - OpcodeVUmax - - // OpcodeVAvgRound performs an unsigned integer avg, truncating to zero: `v = VAvgRound.lane x, y` on vector. - OpcodeVAvgRound - - // OpcodeVImul performs an integer multiplication: `v = VImul.lane x, y` on vector. - OpcodeVImul - - // OpcodeVIneg negates the given integer vector value: `v = VIneg x`. - OpcodeVIneg - - // OpcodeVIpopcnt counts the number of 1-bits in the given vector: `v = VIpopcnt x`. - OpcodeVIpopcnt - - // OpcodeVIabs returns the absolute value for the given vector value: `v = VIabs.lane x`. - OpcodeVIabs - - // OpcodeVIshl shifts x left by (y mod lane-width): `v = VIshl.lane x, y` on vector. - OpcodeVIshl - - // OpcodeVUshr shifts x right by (y mod lane-width), unsigned: `v = VUshr.lane x, y` on vector. - OpcodeVUshr - - // OpcodeVSshr shifts x right by (y mod lane-width), signed: `v = VSshr.lane x, y` on vector. - OpcodeVSshr - - // OpcodeVFabs takes the absolute value of a floating point value: `v = VFabs.lane x on vector. - OpcodeVFabs - - // OpcodeVFmax takes the maximum of two floating point values: `v = VFmax.lane x, y on vector. - OpcodeVFmax - - // OpcodeVFmin takes the minimum of two floating point values: `v = VFmin.lane x, y on vector. - OpcodeVFmin - - // OpcodeVFneg negates the given floating point vector value: `v = VFneg x`. - OpcodeVFneg - - // OpcodeVFadd performs a floating point addition: `v = VFadd.lane x, y` on vector. - OpcodeVFadd - - // OpcodeVFsub performs a floating point subtraction: `v = VFsub.lane x, y` on vector. - OpcodeVFsub - - // OpcodeVFmul performs a floating point multiplication: `v = VFmul.lane x, y` on vector. - OpcodeVFmul - - // OpcodeVFdiv performs a floating point division: `v = VFdiv.lane x, y` on vector. - OpcodeVFdiv - - // OpcodeVFcmp compares two float values with the given condition: `v = VFcmp.lane Cond, x, y` on float. - OpcodeVFcmp - - // OpcodeVCeil takes the ceiling of the given floating point value: `v = ceil.lane x` on vector. - OpcodeVCeil - - // OpcodeVFloor takes the floor of the given floating point value: `v = floor.lane x` on vector. - OpcodeVFloor - - // OpcodeVTrunc takes the truncation of the given floating point value: `v = trunc.lane x` on vector. - OpcodeVTrunc - - // OpcodeVNearest takes the nearest integer of the given floating point value: `v = nearest.lane x` on vector. - OpcodeVNearest - - // OpcodeVMaxPseudo computes the lane-wise maximum value `v = VMaxPseudo.lane x, y` on vector defined as `x < y ? x : y`. - OpcodeVMaxPseudo - - // OpcodeVMinPseudo computes the lane-wise minimum value `v = VMinPseudo.lane x, y` on vector defined as `y < x ? x : y`. - OpcodeVMinPseudo - - // OpcodeVSqrt takes the minimum of two floating point values: `v = VFmin.lane x, y` on vector. - OpcodeVSqrt - - // OpcodeVFcvtToUintSat converts a floating point value to an unsigned integer: `v = FcvtToUintSat.lane x` on vector. - OpcodeVFcvtToUintSat - - // OpcodeVFcvtToSintSat converts a floating point value to a signed integer: `v = VFcvtToSintSat.lane x` on vector. - OpcodeVFcvtToSintSat - - // OpcodeVFcvtFromUint converts a floating point value from an unsigned integer: `v = FcvtFromUint.lane x` on vector. - // x is always a 32-bit integer lane, and the result is either a 32-bit or 64-bit floating point-sized vector. - OpcodeVFcvtFromUint - - // OpcodeVFcvtFromSint converts a floating point value from a signed integer: `v = VFcvtFromSint.lane x` on vector. - // x is always a 32-bit integer lane, and the result is either a 32-bit or 64-bit floating point-sized vector. - OpcodeVFcvtFromSint - - // OpcodeImul performs an integer multiplication: `v = Imul x, y`. - OpcodeImul - - // OpcodeUdiv performs the unsigned integer division `v = Udiv x, y`. - OpcodeUdiv - - // OpcodeSdiv performs the signed integer division `v = Sdiv x, y`. - OpcodeSdiv - - // OpcodeUrem computes the remainder of the unsigned integer division `v = Urem x, y`. - OpcodeUrem - - // OpcodeSrem computes the remainder of the signed integer division `v = Srem x, y`. - OpcodeSrem - - // OpcodeBand performs a binary and: `v = Band x, y`. - OpcodeBand - - // OpcodeBor performs a binary or: `v = Bor x, y`. - OpcodeBor - - // OpcodeBxor performs a binary xor: `v = Bxor x, y`. - OpcodeBxor - - // OpcodeBnot performs a binary not: `v = Bnot x`. - OpcodeBnot - - // OpcodeRotl rotates the given integer value to the left: `v = Rotl x, y`. - OpcodeRotl - - // OpcodeRotr rotates the given integer value to the right: `v = Rotr x, y`. - OpcodeRotr - - // OpcodeIshl does logical shift left: `v = Ishl x, y`. - OpcodeIshl - - // OpcodeUshr does logical shift right: `v = Ushr x, y`. - OpcodeUshr - - // OpcodeSshr does arithmetic shift right: `v = Sshr x, y`. - OpcodeSshr - - // OpcodeClz counts the number of leading zeros: `v = clz x`. - OpcodeClz - - // OpcodeCtz counts the number of trailing zeros: `v = ctz x`. - OpcodeCtz - - // OpcodePopcnt counts the number of 1-bits: `v = popcnt x`. - OpcodePopcnt - - // OpcodeFcmp compares two floating point values: `v = fcmp Cond, x, y`. - OpcodeFcmp - - // OpcodeFadd performs a floating point addition: / `v = Fadd x, y`. - OpcodeFadd - - // OpcodeFsub performs a floating point subtraction: `v = Fsub x, y`. - OpcodeFsub - - // OpcodeFmul performs a floating point multiplication: `v = Fmul x, y`. - OpcodeFmul - - // OpcodeSqmulRoundSat performs a lane-wise saturating rounding multiplication - // in Q15 format: `v = SqmulRoundSat.lane x,y` on vector. - OpcodeSqmulRoundSat - - // OpcodeFdiv performs a floating point division: `v = Fdiv x, y`. - OpcodeFdiv - - // OpcodeSqrt takes the square root of the given floating point value: `v = sqrt x`. - OpcodeSqrt - - // OpcodeFneg negates the given floating point value: `v = Fneg x`. - OpcodeFneg - - // OpcodeFabs takes the absolute value of the given floating point value: `v = fabs x`. - OpcodeFabs - - // OpcodeFcopysign copies the sign of the second floating point value to the first floating point value: - // `v = Fcopysign x, y`. - OpcodeFcopysign - - // OpcodeFmin takes the minimum of two floating point values: `v = fmin x, y`. - OpcodeFmin - - // OpcodeFmax takes the maximum of two floating point values: `v = fmax x, y`. - OpcodeFmax - - // OpcodeCeil takes the ceiling of the given floating point value: `v = ceil x`. - OpcodeCeil - - // OpcodeFloor takes the floor of the given floating point value: `v = floor x`. - OpcodeFloor - - // OpcodeTrunc takes the truncation of the given floating point value: `v = trunc x`. - OpcodeTrunc - - // OpcodeNearest takes the nearest integer of the given floating point value: `v = nearest x`. - OpcodeNearest - - // OpcodeBitcast is a bitcast operation: `v = bitcast x`. - OpcodeBitcast - - // OpcodeIreduce narrow the given integer: `v = Ireduce x`. - OpcodeIreduce - - // OpcodeSnarrow converts two input vectors x, y into a smaller lane vector by narrowing each lane, signed `v = Snarrow.lane x, y`. - OpcodeSnarrow - - // OpcodeUnarrow converts two input vectors x, y into a smaller lane vector by narrowing each lane, unsigned `v = Unarrow.lane x, y`. - OpcodeUnarrow - - // OpcodeSwidenLow converts low half of the smaller lane vector to a larger lane vector, sign extended: `v = SwidenLow.lane x`. - OpcodeSwidenLow - - // OpcodeSwidenHigh converts high half of the smaller lane vector to a larger lane vector, sign extended: `v = SwidenHigh.lane x`. - OpcodeSwidenHigh - - // OpcodeUwidenLow converts low half of the smaller lane vector to a larger lane vector, zero (unsigned) extended: `v = UwidenLow.lane x`. - OpcodeUwidenLow - - // OpcodeUwidenHigh converts high half of the smaller lane vector to a larger lane vector, zero (unsigned) extended: `v = UwidenHigh.lane x`. - OpcodeUwidenHigh - - // OpcodeExtIaddPairwise is a lane-wise integer extended pairwise addition producing extended results (twice wider results than the inputs): `v = extiadd_pairwise x, y` on vector. - OpcodeExtIaddPairwise - - // OpcodeWideningPairwiseDotProductS is a lane-wise widening pairwise dot product with signed saturation: `v = WideningPairwiseDotProductS x, y` on vector. - // Currently, the only lane is i16, and the result is i32. - OpcodeWideningPairwiseDotProductS - - // OpcodeUExtend zero-extends the given integer: `v = UExtend x, from->to`. - OpcodeUExtend - - // OpcodeSExtend sign-extends the given integer: `v = SExtend x, from->to`. - OpcodeSExtend - - // OpcodeFpromote promotes the given floating point value: `v = Fpromote x`. - OpcodeFpromote - - // OpcodeFvpromoteLow converts the two lower single-precision floating point lanes - // to the two double-precision lanes of the result: `v = FvpromoteLow.lane x` on vector. - OpcodeFvpromoteLow - - // OpcodeFdemote demotes the given float point value: `v = Fdemote x`. - OpcodeFdemote - - // OpcodeFvdemote converts the two double-precision floating point lanes - // to two lower single-precision lanes of the result `v = Fvdemote.lane x`. - OpcodeFvdemote - - // OpcodeFcvtToUint converts a floating point value to an unsigned integer: `v = FcvtToUint x`. - OpcodeFcvtToUint - - // OpcodeFcvtToSint converts a floating point value to a signed integer: `v = FcvtToSint x`. - OpcodeFcvtToSint - - // OpcodeFcvtToUintSat converts a floating point value to an unsigned integer: `v = FcvtToUintSat x` which saturates on overflow. - OpcodeFcvtToUintSat - - // OpcodeFcvtToSintSat converts a floating point value to a signed integer: `v = FcvtToSintSat x` which saturates on overflow. - OpcodeFcvtToSintSat - - // OpcodeFcvtFromUint converts an unsigned integer to a floating point value: `v = FcvtFromUint x`. - OpcodeFcvtFromUint - - // OpcodeFcvtFromSint converts a signed integer to a floating point value: `v = FcvtFromSint x`. - OpcodeFcvtFromSint - - // OpcodeAtomicRmw is atomic read-modify-write operation: `v = atomic_rmw op, p, offset, value`. - OpcodeAtomicRmw - - // OpcodeAtomicCas is atomic compare-and-swap operation. - OpcodeAtomicCas - - // OpcodeAtomicLoad is atomic load operation. - OpcodeAtomicLoad - - // OpcodeAtomicStore is atomic store operation. - OpcodeAtomicStore - - // OpcodeFence is a memory fence operation. - OpcodeFence - - // opcodeEnd marks the end of the opcode list. - opcodeEnd -) - -// AtomicRmwOp represents the atomic read-modify-write operation. -type AtomicRmwOp byte - -const ( - // AtomicRmwOpAdd is an atomic add operation. - AtomicRmwOpAdd AtomicRmwOp = iota - // AtomicRmwOpSub is an atomic sub operation. - AtomicRmwOpSub - // AtomicRmwOpAnd is an atomic and operation. - AtomicRmwOpAnd - // AtomicRmwOpOr is an atomic or operation. - AtomicRmwOpOr - // AtomicRmwOpXor is an atomic xor operation. - AtomicRmwOpXor - // AtomicRmwOpXchg is an atomic swap operation. - AtomicRmwOpXchg -) - -// String implements the fmt.Stringer. -func (op AtomicRmwOp) String() string { - switch op { - case AtomicRmwOpAdd: - return "add" - case AtomicRmwOpSub: - return "sub" - case AtomicRmwOpAnd: - return "and" - case AtomicRmwOpOr: - return "or" - case AtomicRmwOpXor: - return "xor" - case AtomicRmwOpXchg: - return "xchg" - } - panic(fmt.Sprintf("unknown AtomicRmwOp: %d", op)) -} - -// returnTypesFn provides the info to determine the type of instruction. -// t1 is the type of the first result, ts are the types of the remaining results. -type returnTypesFn func(b *builder, instr *Instruction) (t1 Type, ts []Type) - -var ( - returnTypesFnNoReturns returnTypesFn = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return typeInvalid, nil } - returnTypesFnSingle = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return instr.typ, nil } - returnTypesFnI32 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeI32, nil } - returnTypesFnF32 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF32, nil } - returnTypesFnF64 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF64, nil } - returnTypesFnV128 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeV128, nil } -) - -// sideEffect provides the info to determine if an instruction has side effects which -// is used to determine if it can be optimized out, interchanged with others, etc. -type sideEffect byte - -const ( - sideEffectUnknown sideEffect = iota - // sideEffectStrict represents an instruction with side effects, and should be always alive plus cannot be reordered. - sideEffectStrict - // sideEffectTraps represents an instruction that can trap, and should be always alive but can be reordered within the group. - sideEffectTraps - // sideEffectNone represents an instruction without side effects, and can be eliminated if the result is not used, plus can be reordered within the group. - sideEffectNone -) - -// instructionSideEffects provides the info to determine if an instruction has side effects. -// Instructions with side effects must not be eliminated regardless whether the result is used or not. -var instructionSideEffects = [opcodeEnd]sideEffect{ - OpcodeUndefined: sideEffectStrict, - OpcodeJump: sideEffectStrict, - OpcodeIconst: sideEffectNone, - OpcodeCall: sideEffectStrict, - OpcodeCallIndirect: sideEffectStrict, - OpcodeIadd: sideEffectNone, - OpcodeImul: sideEffectNone, - OpcodeIsub: sideEffectNone, - OpcodeIcmp: sideEffectNone, - OpcodeExtractlane: sideEffectNone, - OpcodeInsertlane: sideEffectNone, - OpcodeBand: sideEffectNone, - OpcodeBor: sideEffectNone, - OpcodeBxor: sideEffectNone, - OpcodeRotl: sideEffectNone, - OpcodeRotr: sideEffectNone, - OpcodeFcmp: sideEffectNone, - OpcodeFadd: sideEffectNone, - OpcodeClz: sideEffectNone, - OpcodeCtz: sideEffectNone, - OpcodePopcnt: sideEffectNone, - OpcodeLoad: sideEffectNone, - OpcodeLoadSplat: sideEffectNone, - OpcodeUload8: sideEffectNone, - OpcodeUload16: sideEffectNone, - OpcodeUload32: sideEffectNone, - OpcodeSload8: sideEffectNone, - OpcodeSload16: sideEffectNone, - OpcodeSload32: sideEffectNone, - OpcodeSExtend: sideEffectNone, - OpcodeUExtend: sideEffectNone, - OpcodeSwidenLow: sideEffectNone, - OpcodeUwidenLow: sideEffectNone, - OpcodeSwidenHigh: sideEffectNone, - OpcodeUwidenHigh: sideEffectNone, - OpcodeSnarrow: sideEffectNone, - OpcodeUnarrow: sideEffectNone, - OpcodeSwizzle: sideEffectNone, - OpcodeShuffle: sideEffectNone, - OpcodeSplat: sideEffectNone, - OpcodeFsub: sideEffectNone, - OpcodeF32const: sideEffectNone, - OpcodeF64const: sideEffectNone, - OpcodeIshl: sideEffectNone, - OpcodeSshr: sideEffectNone, - OpcodeUshr: sideEffectNone, - OpcodeStore: sideEffectStrict, - OpcodeIstore8: sideEffectStrict, - OpcodeIstore16: sideEffectStrict, - OpcodeIstore32: sideEffectStrict, - OpcodeExitWithCode: sideEffectStrict, - OpcodeExitIfTrueWithCode: sideEffectStrict, - OpcodeReturn: sideEffectStrict, - OpcodeBrz: sideEffectStrict, - OpcodeBrnz: sideEffectStrict, - OpcodeBrTable: sideEffectStrict, - OpcodeFdiv: sideEffectNone, - OpcodeFmul: sideEffectNone, - OpcodeFmax: sideEffectNone, - OpcodeSqmulRoundSat: sideEffectNone, - OpcodeSelect: sideEffectNone, - OpcodeFmin: sideEffectNone, - OpcodeFneg: sideEffectNone, - OpcodeFcvtToSint: sideEffectTraps, - OpcodeFcvtToUint: sideEffectTraps, - OpcodeFcvtFromSint: sideEffectNone, - OpcodeFcvtFromUint: sideEffectNone, - OpcodeFcvtToSintSat: sideEffectNone, - OpcodeFcvtToUintSat: sideEffectNone, - OpcodeVFcvtFromUint: sideEffectNone, - OpcodeVFcvtFromSint: sideEffectNone, - OpcodeFdemote: sideEffectNone, - OpcodeFvpromoteLow: sideEffectNone, - OpcodeFvdemote: sideEffectNone, - OpcodeFpromote: sideEffectNone, - OpcodeBitcast: sideEffectNone, - OpcodeIreduce: sideEffectNone, - OpcodeSqrt: sideEffectNone, - OpcodeCeil: sideEffectNone, - OpcodeFloor: sideEffectNone, - OpcodeTrunc: sideEffectNone, - OpcodeNearest: sideEffectNone, - OpcodeSdiv: sideEffectTraps, - OpcodeSrem: sideEffectTraps, - OpcodeUdiv: sideEffectTraps, - OpcodeUrem: sideEffectTraps, - OpcodeFabs: sideEffectNone, - OpcodeFcopysign: sideEffectNone, - OpcodeExtIaddPairwise: sideEffectNone, - OpcodeVconst: sideEffectNone, - OpcodeVbor: sideEffectNone, - OpcodeVbxor: sideEffectNone, - OpcodeVband: sideEffectNone, - OpcodeVbandnot: sideEffectNone, - OpcodeVbnot: sideEffectNone, - OpcodeVbitselect: sideEffectNone, - OpcodeVanyTrue: sideEffectNone, - OpcodeVallTrue: sideEffectNone, - OpcodeVhighBits: sideEffectNone, - OpcodeVIadd: sideEffectNone, - OpcodeVSaddSat: sideEffectNone, - OpcodeVUaddSat: sideEffectNone, - OpcodeVIsub: sideEffectNone, - OpcodeVSsubSat: sideEffectNone, - OpcodeVUsubSat: sideEffectNone, - OpcodeVIcmp: sideEffectNone, - OpcodeVImin: sideEffectNone, - OpcodeVUmin: sideEffectNone, - OpcodeVImax: sideEffectNone, - OpcodeVUmax: sideEffectNone, - OpcodeVAvgRound: sideEffectNone, - OpcodeVImul: sideEffectNone, - OpcodeVIabs: sideEffectNone, - OpcodeVIneg: sideEffectNone, - OpcodeVIpopcnt: sideEffectNone, - OpcodeVIshl: sideEffectNone, - OpcodeVSshr: sideEffectNone, - OpcodeVUshr: sideEffectNone, - OpcodeVSqrt: sideEffectNone, - OpcodeVFabs: sideEffectNone, - OpcodeVFmin: sideEffectNone, - OpcodeVFmax: sideEffectNone, - OpcodeVFneg: sideEffectNone, - OpcodeVFadd: sideEffectNone, - OpcodeVFsub: sideEffectNone, - OpcodeVFmul: sideEffectNone, - OpcodeVFdiv: sideEffectNone, - OpcodeVFcmp: sideEffectNone, - OpcodeVCeil: sideEffectNone, - OpcodeVFloor: sideEffectNone, - OpcodeVTrunc: sideEffectNone, - OpcodeVNearest: sideEffectNone, - OpcodeVMaxPseudo: sideEffectNone, - OpcodeVMinPseudo: sideEffectNone, - OpcodeVFcvtToUintSat: sideEffectNone, - OpcodeVFcvtToSintSat: sideEffectNone, - OpcodeVZeroExtLoad: sideEffectNone, - OpcodeAtomicRmw: sideEffectStrict, - OpcodeAtomicLoad: sideEffectStrict, - OpcodeAtomicStore: sideEffectStrict, - OpcodeAtomicCas: sideEffectStrict, - OpcodeFence: sideEffectStrict, - OpcodeWideningPairwiseDotProductS: sideEffectNone, -} - -// sideEffect returns true if this instruction has side effects. -func (i *Instruction) sideEffect() sideEffect { - if e := instructionSideEffects[i.opcode]; e == sideEffectUnknown { - panic("BUG: side effect info not registered for " + i.opcode.String()) - } else { - return e - } -} - -// instructionReturnTypes provides the function to determine the return types of an instruction. -var instructionReturnTypes = [opcodeEnd]returnTypesFn{ - OpcodeExtIaddPairwise: returnTypesFnV128, - OpcodeVbor: returnTypesFnV128, - OpcodeVbxor: returnTypesFnV128, - OpcodeVband: returnTypesFnV128, - OpcodeVbnot: returnTypesFnV128, - OpcodeVbandnot: returnTypesFnV128, - OpcodeVbitselect: returnTypesFnV128, - OpcodeVanyTrue: returnTypesFnI32, - OpcodeVallTrue: returnTypesFnI32, - OpcodeVhighBits: returnTypesFnI32, - OpcodeVIadd: returnTypesFnV128, - OpcodeVSaddSat: returnTypesFnV128, - OpcodeVUaddSat: returnTypesFnV128, - OpcodeVIsub: returnTypesFnV128, - OpcodeVSsubSat: returnTypesFnV128, - OpcodeVUsubSat: returnTypesFnV128, - OpcodeVIcmp: returnTypesFnV128, - OpcodeVImin: returnTypesFnV128, - OpcodeVUmin: returnTypesFnV128, - OpcodeVImax: returnTypesFnV128, - OpcodeVUmax: returnTypesFnV128, - OpcodeVImul: returnTypesFnV128, - OpcodeVAvgRound: returnTypesFnV128, - OpcodeVIabs: returnTypesFnV128, - OpcodeVIneg: returnTypesFnV128, - OpcodeVIpopcnt: returnTypesFnV128, - OpcodeVIshl: returnTypesFnV128, - OpcodeVSshr: returnTypesFnV128, - OpcodeVUshr: returnTypesFnV128, - OpcodeExtractlane: returnTypesFnSingle, - OpcodeInsertlane: returnTypesFnV128, - OpcodeBand: returnTypesFnSingle, - OpcodeFcopysign: returnTypesFnSingle, - OpcodeBitcast: returnTypesFnSingle, - OpcodeBor: returnTypesFnSingle, - OpcodeBxor: returnTypesFnSingle, - OpcodeRotl: returnTypesFnSingle, - OpcodeRotr: returnTypesFnSingle, - OpcodeIshl: returnTypesFnSingle, - OpcodeSshr: returnTypesFnSingle, - OpcodeSdiv: returnTypesFnSingle, - OpcodeSrem: returnTypesFnSingle, - OpcodeUdiv: returnTypesFnSingle, - OpcodeUrem: returnTypesFnSingle, - OpcodeUshr: returnTypesFnSingle, - OpcodeJump: returnTypesFnNoReturns, - OpcodeUndefined: returnTypesFnNoReturns, - OpcodeIconst: returnTypesFnSingle, - OpcodeSelect: returnTypesFnSingle, - OpcodeSExtend: returnTypesFnSingle, - OpcodeUExtend: returnTypesFnSingle, - OpcodeSwidenLow: returnTypesFnV128, - OpcodeUwidenLow: returnTypesFnV128, - OpcodeSwidenHigh: returnTypesFnV128, - OpcodeUwidenHigh: returnTypesFnV128, - OpcodeSnarrow: returnTypesFnV128, - OpcodeUnarrow: returnTypesFnV128, - OpcodeSwizzle: returnTypesFnSingle, - OpcodeShuffle: returnTypesFnV128, - OpcodeSplat: returnTypesFnV128, - OpcodeIreduce: returnTypesFnSingle, - OpcodeFabs: returnTypesFnSingle, - OpcodeSqrt: returnTypesFnSingle, - OpcodeCeil: returnTypesFnSingle, - OpcodeFloor: returnTypesFnSingle, - OpcodeTrunc: returnTypesFnSingle, - OpcodeNearest: returnTypesFnSingle, - OpcodeCallIndirect: func(b *builder, instr *Instruction) (t1 Type, ts []Type) { - sigID := SignatureID(instr.u1) - sig, ok := b.signatures[sigID] - if !ok { - panic("BUG") - } - switch len(sig.Results) { - case 0: - t1 = typeInvalid - case 1: - t1 = sig.Results[0] - default: - t1, ts = sig.Results[0], sig.Results[1:] - } - return - }, - OpcodeCall: func(b *builder, instr *Instruction) (t1 Type, ts []Type) { - sigID := SignatureID(instr.u2) - sig, ok := b.signatures[sigID] - if !ok { - panic("BUG") - } - switch len(sig.Results) { - case 0: - t1 = typeInvalid - case 1: - t1 = sig.Results[0] - default: - t1, ts = sig.Results[0], sig.Results[1:] - } - return - }, - OpcodeLoad: returnTypesFnSingle, - OpcodeVZeroExtLoad: returnTypesFnV128, - OpcodeLoadSplat: returnTypesFnV128, - OpcodeIadd: returnTypesFnSingle, - OpcodeIsub: returnTypesFnSingle, - OpcodeImul: returnTypesFnSingle, - OpcodeIcmp: returnTypesFnI32, - OpcodeFcmp: returnTypesFnI32, - OpcodeFadd: returnTypesFnSingle, - OpcodeFsub: returnTypesFnSingle, - OpcodeFdiv: returnTypesFnSingle, - OpcodeFmul: returnTypesFnSingle, - OpcodeFmax: returnTypesFnSingle, - OpcodeFmin: returnTypesFnSingle, - OpcodeSqmulRoundSat: returnTypesFnV128, - OpcodeF32const: returnTypesFnF32, - OpcodeF64const: returnTypesFnF64, - OpcodeClz: returnTypesFnSingle, - OpcodeCtz: returnTypesFnSingle, - OpcodePopcnt: returnTypesFnSingle, - OpcodeStore: returnTypesFnNoReturns, - OpcodeIstore8: returnTypesFnNoReturns, - OpcodeIstore16: returnTypesFnNoReturns, - OpcodeIstore32: returnTypesFnNoReturns, - OpcodeExitWithCode: returnTypesFnNoReturns, - OpcodeExitIfTrueWithCode: returnTypesFnNoReturns, - OpcodeReturn: returnTypesFnNoReturns, - OpcodeBrz: returnTypesFnNoReturns, - OpcodeBrnz: returnTypesFnNoReturns, - OpcodeBrTable: returnTypesFnNoReturns, - OpcodeUload8: returnTypesFnSingle, - OpcodeUload16: returnTypesFnSingle, - OpcodeUload32: returnTypesFnSingle, - OpcodeSload8: returnTypesFnSingle, - OpcodeSload16: returnTypesFnSingle, - OpcodeSload32: returnTypesFnSingle, - OpcodeFcvtToSint: returnTypesFnSingle, - OpcodeFcvtToUint: returnTypesFnSingle, - OpcodeFcvtFromSint: returnTypesFnSingle, - OpcodeFcvtFromUint: returnTypesFnSingle, - OpcodeFcvtToSintSat: returnTypesFnSingle, - OpcodeFcvtToUintSat: returnTypesFnSingle, - OpcodeVFcvtFromUint: returnTypesFnV128, - OpcodeVFcvtFromSint: returnTypesFnV128, - OpcodeFneg: returnTypesFnSingle, - OpcodeFdemote: returnTypesFnF32, - OpcodeFvdemote: returnTypesFnV128, - OpcodeFvpromoteLow: returnTypesFnV128, - OpcodeFpromote: returnTypesFnF64, - OpcodeVconst: returnTypesFnV128, - OpcodeVFabs: returnTypesFnV128, - OpcodeVSqrt: returnTypesFnV128, - OpcodeVFmax: returnTypesFnV128, - OpcodeVFmin: returnTypesFnV128, - OpcodeVFneg: returnTypesFnV128, - OpcodeVFadd: returnTypesFnV128, - OpcodeVFsub: returnTypesFnV128, - OpcodeVFmul: returnTypesFnV128, - OpcodeVFdiv: returnTypesFnV128, - OpcodeVFcmp: returnTypesFnV128, - OpcodeVCeil: returnTypesFnV128, - OpcodeVFloor: returnTypesFnV128, - OpcodeVTrunc: returnTypesFnV128, - OpcodeVNearest: returnTypesFnV128, - OpcodeVMaxPseudo: returnTypesFnV128, - OpcodeVMinPseudo: returnTypesFnV128, - OpcodeVFcvtToUintSat: returnTypesFnV128, - OpcodeVFcvtToSintSat: returnTypesFnV128, - OpcodeAtomicRmw: returnTypesFnSingle, - OpcodeAtomicLoad: returnTypesFnSingle, - OpcodeAtomicStore: returnTypesFnNoReturns, - OpcodeAtomicCas: returnTypesFnSingle, - OpcodeFence: returnTypesFnNoReturns, - OpcodeWideningPairwiseDotProductS: returnTypesFnV128, -} - -// AsLoad initializes this instruction as a store instruction with OpcodeLoad. -func (i *Instruction) AsLoad(ptr Value, offset uint32, typ Type) *Instruction { - i.opcode = OpcodeLoad - i.v = ptr - i.u1 = uint64(offset) - i.typ = typ - return i -} - -// AsExtLoad initializes this instruction as a store instruction with OpcodeLoad. -func (i *Instruction) AsExtLoad(op Opcode, ptr Value, offset uint32, dst64bit bool) *Instruction { - i.opcode = op - i.v = ptr - i.u1 = uint64(offset) - if dst64bit { - i.typ = TypeI64 - } else { - i.typ = TypeI32 - } - return i -} - -// AsVZeroExtLoad initializes this instruction as a store instruction with OpcodeVExtLoad. -func (i *Instruction) AsVZeroExtLoad(ptr Value, offset uint32, scalarType Type) *Instruction { - i.opcode = OpcodeVZeroExtLoad - i.v = ptr - i.u1 = uint64(offset) - i.u2 = uint64(scalarType) - i.typ = TypeV128 - return i -} - -// VZeroExtLoadData returns the operands for a load instruction. The returned `typ` is the scalar type of the load target. -func (i *Instruction) VZeroExtLoadData() (ptr Value, offset uint32, typ Type) { - return i.v, uint32(i.u1), Type(i.u2) -} - -// AsLoadSplat initializes this instruction as a store instruction with OpcodeLoadSplat. -func (i *Instruction) AsLoadSplat(ptr Value, offset uint32, lane VecLane) *Instruction { - i.opcode = OpcodeLoadSplat - i.v = ptr - i.u1 = uint64(offset) - i.u2 = uint64(lane) - i.typ = TypeV128 - return i -} - -// LoadData returns the operands for a load instruction. -func (i *Instruction) LoadData() (ptr Value, offset uint32, typ Type) { - return i.v, uint32(i.u1), i.typ -} - -// LoadSplatData returns the operands for a load splat instruction. -func (i *Instruction) LoadSplatData() (ptr Value, offset uint32, lane VecLane) { - return i.v, uint32(i.u1), VecLane(i.u2) -} - -// AsStore initializes this instruction as a store instruction with OpcodeStore. -func (i *Instruction) AsStore(storeOp Opcode, value, ptr Value, offset uint32) *Instruction { - i.opcode = storeOp - i.v = value - i.v2 = ptr - - var dstSize uint64 - switch storeOp { - case OpcodeStore: - dstSize = uint64(value.Type().Bits()) - case OpcodeIstore8: - dstSize = 8 - case OpcodeIstore16: - dstSize = 16 - case OpcodeIstore32: - dstSize = 32 - default: - panic("invalid store opcode" + storeOp.String()) - } - i.u1 = uint64(offset) | dstSize<<32 - return i -} - -// StoreData returns the operands for a store instruction. -func (i *Instruction) StoreData() (value, ptr Value, offset uint32, storeSizeInBits byte) { - return i.v, i.v2, uint32(i.u1), byte(i.u1 >> 32) -} - -// AsIconst64 initializes this instruction as a 64-bit integer constant instruction with OpcodeIconst. -func (i *Instruction) AsIconst64(v uint64) *Instruction { - i.opcode = OpcodeIconst - i.typ = TypeI64 - i.u1 = v - return i -} - -// AsIconst32 initializes this instruction as a 32-bit integer constant instruction with OpcodeIconst. -func (i *Instruction) AsIconst32(v uint32) *Instruction { - i.opcode = OpcodeIconst - i.typ = TypeI32 - i.u1 = uint64(v) - return i -} - -// AsIadd initializes this instruction as an integer addition instruction with OpcodeIadd. -func (i *Instruction) AsIadd(x, y Value) *Instruction { - i.opcode = OpcodeIadd - i.v = x - i.v2 = y - i.typ = x.Type() - return i -} - -// AsVIadd initializes this instruction as an integer addition instruction with OpcodeVIadd on a vector. -func (i *Instruction) AsVIadd(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVIadd - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsWideningPairwiseDotProductS initializes this instruction as a lane-wise integer extended pairwise addition instruction -// with OpcodeIaddPairwise on a vector. -func (i *Instruction) AsWideningPairwiseDotProductS(x, y Value) *Instruction { - i.opcode = OpcodeWideningPairwiseDotProductS - i.v = x - i.v2 = y - i.typ = TypeV128 - return i -} - -// AsExtIaddPairwise initializes this instruction as a lane-wise integer extended pairwise addition instruction -// with OpcodeIaddPairwise on a vector. -func (i *Instruction) AsExtIaddPairwise(x Value, srcLane VecLane, signed bool) *Instruction { - i.opcode = OpcodeExtIaddPairwise - i.v = x - i.u1 = uint64(srcLane) - if signed { - i.u2 = 1 - } - i.typ = TypeV128 - return i -} - -// ExtIaddPairwiseData returns the operands for a lane-wise integer extended pairwise addition instruction. -func (i *Instruction) ExtIaddPairwiseData() (x Value, srcLane VecLane, signed bool) { - return i.v, VecLane(i.u1), i.u2 != 0 -} - -// AsVSaddSat initializes this instruction as a vector addition with saturation instruction with OpcodeVSaddSat on a vector. -func (i *Instruction) AsVSaddSat(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVSaddSat - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVUaddSat initializes this instruction as a vector addition with saturation instruction with OpcodeVUaddSat on a vector. -func (i *Instruction) AsVUaddSat(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVUaddSat - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVIsub initializes this instruction as an integer subtraction instruction with OpcodeVIsub on a vector. -func (i *Instruction) AsVIsub(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVIsub - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVSsubSat initializes this instruction as a vector addition with saturation instruction with OpcodeVSsubSat on a vector. -func (i *Instruction) AsVSsubSat(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVSsubSat - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVUsubSat initializes this instruction as a vector addition with saturation instruction with OpcodeVUsubSat on a vector. -func (i *Instruction) AsVUsubSat(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVUsubSat - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVImin initializes this instruction as a signed integer min instruction with OpcodeVImin on a vector. -func (i *Instruction) AsVImin(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVImin - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVUmin initializes this instruction as an unsigned integer min instruction with OpcodeVUmin on a vector. -func (i *Instruction) AsVUmin(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVUmin - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVImax initializes this instruction as a signed integer max instruction with OpcodeVImax on a vector. -func (i *Instruction) AsVImax(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVImax - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVUmax initializes this instruction as an unsigned integer max instruction with OpcodeVUmax on a vector. -func (i *Instruction) AsVUmax(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVUmax - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVAvgRound initializes this instruction as an unsigned integer avg instruction, truncating to zero with OpcodeVAvgRound on a vector. -func (i *Instruction) AsVAvgRound(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVAvgRound - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVImul initializes this instruction as an integer multiplication with OpcodeVImul on a vector. -func (i *Instruction) AsVImul(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVImul - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsSqmulRoundSat initializes this instruction as a lane-wise saturating rounding multiplication -// in Q15 format with OpcodeSqmulRoundSat on a vector. -func (i *Instruction) AsSqmulRoundSat(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeSqmulRoundSat - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVIabs initializes this instruction as a vector absolute value with OpcodeVIabs. -func (i *Instruction) AsVIabs(x Value, lane VecLane) *Instruction { - i.opcode = OpcodeVIabs - i.v = x - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVIneg initializes this instruction as a vector negation with OpcodeVIneg. -func (i *Instruction) AsVIneg(x Value, lane VecLane) *Instruction { - i.opcode = OpcodeVIneg - i.v = x - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVIpopcnt initializes this instruction as a Population Count instruction with OpcodeVIpopcnt on a vector. -func (i *Instruction) AsVIpopcnt(x Value, lane VecLane) *Instruction { - if lane != VecLaneI8x16 { - panic("Unsupported lane type " + lane.String()) - } - i.opcode = OpcodeVIpopcnt - i.v = x - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVSqrt initializes this instruction as a sqrt instruction with OpcodeVSqrt on a vector. -func (i *Instruction) AsVSqrt(x Value, lane VecLane) *Instruction { - i.opcode = OpcodeVSqrt - i.v = x - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVFabs initializes this instruction as a float abs instruction with OpcodeVFabs on a vector. -func (i *Instruction) AsVFabs(x Value, lane VecLane) *Instruction { - i.opcode = OpcodeVFabs - i.v = x - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVFneg initializes this instruction as a float neg instruction with OpcodeVFneg on a vector. -func (i *Instruction) AsVFneg(x Value, lane VecLane) *Instruction { - i.opcode = OpcodeVFneg - i.v = x - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVFmax initializes this instruction as a float max instruction with OpcodeVFmax on a vector. -func (i *Instruction) AsVFmax(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVFmax - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVFmin initializes this instruction as a float min instruction with OpcodeVFmin on a vector. -func (i *Instruction) AsVFmin(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVFmin - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVFadd initializes this instruction as a floating point add instruction with OpcodeVFadd on a vector. -func (i *Instruction) AsVFadd(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVFadd - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVFsub initializes this instruction as a floating point subtraction instruction with OpcodeVFsub on a vector. -func (i *Instruction) AsVFsub(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVFsub - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVFmul initializes this instruction as a floating point multiplication instruction with OpcodeVFmul on a vector. -func (i *Instruction) AsVFmul(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVFmul - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVFdiv initializes this instruction as a floating point division instruction with OpcodeVFdiv on a vector. -func (i *Instruction) AsVFdiv(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVFdiv - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsImul initializes this instruction as an integer addition instruction with OpcodeImul. -func (i *Instruction) AsImul(x, y Value) *Instruction { - i.opcode = OpcodeImul - i.v = x - i.v2 = y - i.typ = x.Type() - return i -} - -func (i *Instruction) Insert(b Builder) *Instruction { - b.InsertInstruction(i) - return i -} - -// AsIsub initializes this instruction as an integer subtraction instruction with OpcodeIsub. -func (i *Instruction) AsIsub(x, y Value) *Instruction { - i.opcode = OpcodeIsub - i.v = x - i.v2 = y - i.typ = x.Type() - return i -} - -// AsIcmp initializes this instruction as an integer comparison instruction with OpcodeIcmp. -func (i *Instruction) AsIcmp(x, y Value, c IntegerCmpCond) *Instruction { - i.opcode = OpcodeIcmp - i.v = x - i.v2 = y - i.u1 = uint64(c) - i.typ = TypeI32 - return i -} - -// AsFcmp initializes this instruction as an integer comparison instruction with OpcodeFcmp. -func (i *Instruction) AsFcmp(x, y Value, c FloatCmpCond) { - i.opcode = OpcodeFcmp - i.v = x - i.v2 = y - i.u1 = uint64(c) - i.typ = TypeI32 -} - -// AsVIcmp initializes this instruction as an integer vector comparison instruction with OpcodeVIcmp. -func (i *Instruction) AsVIcmp(x, y Value, c IntegerCmpCond, lane VecLane) *Instruction { - i.opcode = OpcodeVIcmp - i.v = x - i.v2 = y - i.u1 = uint64(c) - i.u2 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsVFcmp initializes this instruction as a float comparison instruction with OpcodeVFcmp on Vector. -func (i *Instruction) AsVFcmp(x, y Value, c FloatCmpCond, lane VecLane) *Instruction { - i.opcode = OpcodeVFcmp - i.v = x - i.v2 = y - i.u1 = uint64(c) - i.typ = TypeV128 - i.u2 = uint64(lane) - return i -} - -// AsVCeil initializes this instruction as an instruction with OpcodeCeil. -func (i *Instruction) AsVCeil(x Value, lane VecLane) *Instruction { - i.opcode = OpcodeVCeil - i.v = x - i.typ = x.Type() - i.u1 = uint64(lane) - return i -} - -// AsVFloor initializes this instruction as an instruction with OpcodeFloor. -func (i *Instruction) AsVFloor(x Value, lane VecLane) *Instruction { - i.opcode = OpcodeVFloor - i.v = x - i.typ = x.Type() - i.u1 = uint64(lane) - return i -} - -// AsVTrunc initializes this instruction as an instruction with OpcodeTrunc. -func (i *Instruction) AsVTrunc(x Value, lane VecLane) *Instruction { - i.opcode = OpcodeVTrunc - i.v = x - i.typ = x.Type() - i.u1 = uint64(lane) - return i -} - -// AsVNearest initializes this instruction as an instruction with OpcodeNearest. -func (i *Instruction) AsVNearest(x Value, lane VecLane) *Instruction { - i.opcode = OpcodeVNearest - i.v = x - i.typ = x.Type() - i.u1 = uint64(lane) - return i -} - -// AsVMaxPseudo initializes this instruction as an instruction with OpcodeVMaxPseudo. -func (i *Instruction) AsVMaxPseudo(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVMaxPseudo - i.typ = x.Type() - i.v = x - i.v2 = y - i.u1 = uint64(lane) - return i -} - -// AsVMinPseudo initializes this instruction as an instruction with OpcodeVMinPseudo. -func (i *Instruction) AsVMinPseudo(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeVMinPseudo - i.typ = x.Type() - i.v = x - i.v2 = y - i.u1 = uint64(lane) - return i -} - -// AsSDiv initializes this instruction as an integer bitwise and instruction with OpcodeSdiv. -func (i *Instruction) AsSDiv(x, y, ctx Value) *Instruction { - i.opcode = OpcodeSdiv - i.v = x - i.v2 = y - i.v3 = ctx - i.typ = x.Type() - return i -} - -// AsUDiv initializes this instruction as an integer bitwise and instruction with OpcodeUdiv. -func (i *Instruction) AsUDiv(x, y, ctx Value) *Instruction { - i.opcode = OpcodeUdiv - i.v = x - i.v2 = y - i.v3 = ctx - i.typ = x.Type() - return i -} - -// AsSRem initializes this instruction as an integer bitwise and instruction with OpcodeSrem. -func (i *Instruction) AsSRem(x, y, ctx Value) *Instruction { - i.opcode = OpcodeSrem - i.v = x - i.v2 = y - i.v3 = ctx - i.typ = x.Type() - return i -} - -// AsURem initializes this instruction as an integer bitwise and instruction with OpcodeUrem. -func (i *Instruction) AsURem(x, y, ctx Value) *Instruction { - i.opcode = OpcodeUrem - i.v = x - i.v2 = y - i.v3 = ctx - i.typ = x.Type() - return i -} - -// AsBand initializes this instruction as an integer bitwise and instruction with OpcodeBand. -func (i *Instruction) AsBand(x, amount Value) *Instruction { - i.opcode = OpcodeBand - i.v = x - i.v2 = amount - i.typ = x.Type() - return i -} - -// AsBor initializes this instruction as an integer bitwise or instruction with OpcodeBor. -func (i *Instruction) AsBor(x, amount Value) { - i.opcode = OpcodeBor - i.v = x - i.v2 = amount - i.typ = x.Type() -} - -// AsBxor initializes this instruction as an integer bitwise xor instruction with OpcodeBxor. -func (i *Instruction) AsBxor(x, amount Value) { - i.opcode = OpcodeBxor - i.v = x - i.v2 = amount - i.typ = x.Type() -} - -// AsIshl initializes this instruction as an integer shift left instruction with OpcodeIshl. -func (i *Instruction) AsIshl(x, amount Value) *Instruction { - i.opcode = OpcodeIshl - i.v = x - i.v2 = amount - i.typ = x.Type() - return i -} - -// AsVIshl initializes this instruction as an integer shift left instruction with OpcodeVIshl on vector. -func (i *Instruction) AsVIshl(x, amount Value, lane VecLane) *Instruction { - i.opcode = OpcodeVIshl - i.v = x - i.v2 = amount - i.u1 = uint64(lane) - i.typ = x.Type() - return i -} - -// AsUshr initializes this instruction as an integer unsigned shift right (logical shift right) instruction with OpcodeUshr. -func (i *Instruction) AsUshr(x, amount Value) *Instruction { - i.opcode = OpcodeUshr - i.v = x - i.v2 = amount - i.typ = x.Type() - return i -} - -// AsVUshr initializes this instruction as an integer unsigned shift right (logical shift right) instruction with OpcodeVUshr on vector. -func (i *Instruction) AsVUshr(x, amount Value, lane VecLane) *Instruction { - i.opcode = OpcodeVUshr - i.v = x - i.v2 = amount - i.u1 = uint64(lane) - i.typ = x.Type() - return i -} - -// AsSshr initializes this instruction as an integer signed shift right (arithmetic shift right) instruction with OpcodeSshr. -func (i *Instruction) AsSshr(x, amount Value) *Instruction { - i.opcode = OpcodeSshr - i.v = x - i.v2 = amount - i.typ = x.Type() - return i -} - -// AsVSshr initializes this instruction as an integer signed shift right (arithmetic shift right) instruction with OpcodeVSshr on vector. -func (i *Instruction) AsVSshr(x, amount Value, lane VecLane) *Instruction { - i.opcode = OpcodeVSshr - i.v = x - i.v2 = amount - i.u1 = uint64(lane) - i.typ = x.Type() - return i -} - -// AsExtractlane initializes this instruction as an extract lane instruction with OpcodeExtractlane on vector. -func (i *Instruction) AsExtractlane(x Value, index byte, lane VecLane, signed bool) *Instruction { - i.opcode = OpcodeExtractlane - i.v = x - // We do not have a field for signedness, but `index` is a byte, - // so we just encode the flag in the high bits of `u1`. - i.u1 = uint64(index) - if signed { - i.u1 = i.u1 | 1<<32 - } - i.u2 = uint64(lane) - switch lane { - case VecLaneI8x16, VecLaneI16x8, VecLaneI32x4: - i.typ = TypeI32 - case VecLaneI64x2: - i.typ = TypeI64 - case VecLaneF32x4: - i.typ = TypeF32 - case VecLaneF64x2: - i.typ = TypeF64 - } - return i -} - -// AsInsertlane initializes this instruction as an insert lane instruction with OpcodeInsertlane on vector. -func (i *Instruction) AsInsertlane(x, y Value, index byte, lane VecLane) *Instruction { - i.opcode = OpcodeInsertlane - i.v = x - i.v2 = y - i.u1 = uint64(index) - i.u2 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsShuffle initializes this instruction as a shuffle instruction with OpcodeShuffle on vector. -func (i *Instruction) AsShuffle(x, y Value, lane []byte) *Instruction { - i.opcode = OpcodeShuffle - i.v = x - i.v2 = y - // Encode the 16 bytes as 8 bytes in u1, and 8 bytes in u2. - i.u1 = uint64(lane[7])<<56 | uint64(lane[6])<<48 | uint64(lane[5])<<40 | uint64(lane[4])<<32 | uint64(lane[3])<<24 | uint64(lane[2])<<16 | uint64(lane[1])<<8 | uint64(lane[0]) - i.u2 = uint64(lane[15])<<56 | uint64(lane[14])<<48 | uint64(lane[13])<<40 | uint64(lane[12])<<32 | uint64(lane[11])<<24 | uint64(lane[10])<<16 | uint64(lane[9])<<8 | uint64(lane[8]) - i.typ = TypeV128 - return i -} - -// AsSwizzle initializes this instruction as an insert lane instruction with OpcodeSwizzle on vector. -func (i *Instruction) AsSwizzle(x, y Value, lane VecLane) *Instruction { - i.opcode = OpcodeSwizzle - i.v = x - i.v2 = y - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsSplat initializes this instruction as an insert lane instruction with OpcodeSplat on vector. -func (i *Instruction) AsSplat(x Value, lane VecLane) *Instruction { - i.opcode = OpcodeSplat - i.v = x - i.u1 = uint64(lane) - i.typ = TypeV128 - return i -} - -// AsRotl initializes this instruction as a word rotate left instruction with OpcodeRotl. -func (i *Instruction) AsRotl(x, amount Value) { - i.opcode = OpcodeRotl - i.v = x - i.v2 = amount - i.typ = x.Type() -} - -// AsRotr initializes this instruction as a word rotate right instruction with OpcodeRotr. -func (i *Instruction) AsRotr(x, amount Value) { - i.opcode = OpcodeRotr - i.v = x - i.v2 = amount - i.typ = x.Type() -} - -// IcmpData returns the operands and comparison condition of this integer comparison instruction. -func (i *Instruction) IcmpData() (x, y Value, c IntegerCmpCond) { - return i.v, i.v2, IntegerCmpCond(i.u1) -} - -// FcmpData returns the operands and comparison condition of this floating-point comparison instruction. -func (i *Instruction) FcmpData() (x, y Value, c FloatCmpCond) { - return i.v, i.v2, FloatCmpCond(i.u1) -} - -// VIcmpData returns the operands and comparison condition of this integer comparison instruction on vector. -func (i *Instruction) VIcmpData() (x, y Value, c IntegerCmpCond, l VecLane) { - return i.v, i.v2, IntegerCmpCond(i.u1), VecLane(i.u2) -} - -// VFcmpData returns the operands and comparison condition of this float comparison instruction on vector. -func (i *Instruction) VFcmpData() (x, y Value, c FloatCmpCond, l VecLane) { - return i.v, i.v2, FloatCmpCond(i.u1), VecLane(i.u2) -} - -// ExtractlaneData returns the operands and sign flag of Extractlane on vector. -func (i *Instruction) ExtractlaneData() (x Value, index byte, signed bool, l VecLane) { - x = i.v - index = byte(0b00001111 & i.u1) - signed = i.u1>>32 != 0 - l = VecLane(i.u2) - return -} - -// InsertlaneData returns the operands and sign flag of Insertlane on vector. -func (i *Instruction) InsertlaneData() (x, y Value, index byte, l VecLane) { - x = i.v - y = i.v2 - index = byte(i.u1) - l = VecLane(i.u2) - return -} - -// AsFadd initializes this instruction as a floating-point addition instruction with OpcodeFadd. -func (i *Instruction) AsFadd(x, y Value) { - i.opcode = OpcodeFadd - i.v = x - i.v2 = y - i.typ = x.Type() -} - -// AsFsub initializes this instruction as a floating-point subtraction instruction with OpcodeFsub. -func (i *Instruction) AsFsub(x, y Value) { - i.opcode = OpcodeFsub - i.v = x - i.v2 = y - i.typ = x.Type() -} - -// AsFmul initializes this instruction as a floating-point multiplication instruction with OpcodeFmul. -func (i *Instruction) AsFmul(x, y Value) { - i.opcode = OpcodeFmul - i.v = x - i.v2 = y - i.typ = x.Type() -} - -// AsFdiv initializes this instruction as a floating-point division instruction with OpcodeFdiv. -func (i *Instruction) AsFdiv(x, y Value) { - i.opcode = OpcodeFdiv - i.v = x - i.v2 = y - i.typ = x.Type() -} - -// AsFmin initializes this instruction to take the minimum of two floating-points with OpcodeFmin. -func (i *Instruction) AsFmin(x, y Value) { - i.opcode = OpcodeFmin - i.v = x - i.v2 = y - i.typ = x.Type() -} - -// AsFmax initializes this instruction to take the maximum of two floating-points with OpcodeFmax. -func (i *Instruction) AsFmax(x, y Value) { - i.opcode = OpcodeFmax - i.v = x - i.v2 = y - i.typ = x.Type() -} - -// AsF32const initializes this instruction as a 32-bit floating-point constant instruction with OpcodeF32const. -func (i *Instruction) AsF32const(f float32) *Instruction { - i.opcode = OpcodeF32const - i.typ = TypeF64 - i.u1 = uint64(math.Float32bits(f)) - return i -} - -// AsF64const initializes this instruction as a 64-bit floating-point constant instruction with OpcodeF64const. -func (i *Instruction) AsF64const(f float64) *Instruction { - i.opcode = OpcodeF64const - i.typ = TypeF64 - i.u1 = math.Float64bits(f) - return i -} - -// AsVconst initializes this instruction as a vector constant instruction with OpcodeVconst. -func (i *Instruction) AsVconst(lo, hi uint64) *Instruction { - i.opcode = OpcodeVconst - i.typ = TypeV128 - i.u1 = lo - i.u2 = hi - return i -} - -// AsVbnot initializes this instruction as a vector negation instruction with OpcodeVbnot. -func (i *Instruction) AsVbnot(v Value) *Instruction { - i.opcode = OpcodeVbnot - i.typ = TypeV128 - i.v = v - return i -} - -// AsVband initializes this instruction as an and vector instruction with OpcodeVband. -func (i *Instruction) AsVband(x, y Value) *Instruction { - i.opcode = OpcodeVband - i.typ = TypeV128 - i.v = x - i.v2 = y - return i -} - -// AsVbor initializes this instruction as an or vector instruction with OpcodeVbor. -func (i *Instruction) AsVbor(x, y Value) *Instruction { - i.opcode = OpcodeVbor - i.typ = TypeV128 - i.v = x - i.v2 = y - return i -} - -// AsVbxor initializes this instruction as a xor vector instruction with OpcodeVbxor. -func (i *Instruction) AsVbxor(x, y Value) *Instruction { - i.opcode = OpcodeVbxor - i.typ = TypeV128 - i.v = x - i.v2 = y - return i -} - -// AsVbandnot initializes this instruction as an and-not vector instruction with OpcodeVbandnot. -func (i *Instruction) AsVbandnot(x, y Value) *Instruction { - i.opcode = OpcodeVbandnot - i.typ = TypeV128 - i.v = x - i.v2 = y - return i -} - -// AsVbitselect initializes this instruction as a bit select vector instruction with OpcodeVbitselect. -func (i *Instruction) AsVbitselect(c, x, y Value) *Instruction { - i.opcode = OpcodeVbitselect - i.typ = TypeV128 - i.v = c - i.v2 = x - i.v3 = y - return i -} - -// AsVanyTrue initializes this instruction as an anyTrue vector instruction with OpcodeVanyTrue. -func (i *Instruction) AsVanyTrue(x Value) *Instruction { - i.opcode = OpcodeVanyTrue - i.typ = TypeI32 - i.v = x - return i -} - -// AsVallTrue initializes this instruction as an allTrue vector instruction with OpcodeVallTrue. -func (i *Instruction) AsVallTrue(x Value, lane VecLane) *Instruction { - i.opcode = OpcodeVallTrue - i.typ = TypeI32 - i.v = x - i.u1 = uint64(lane) - return i -} - -// AsVhighBits initializes this instruction as a highBits vector instruction with OpcodeVhighBits. -func (i *Instruction) AsVhighBits(x Value, lane VecLane) *Instruction { - i.opcode = OpcodeVhighBits - i.typ = TypeI32 - i.v = x - i.u1 = uint64(lane) - return i -} - -// VconstData returns the operands of this vector constant instruction. -func (i *Instruction) VconstData() (lo, hi uint64) { - return i.u1, i.u2 -} - -// AsReturn initializes this instruction as a return instruction with OpcodeReturn. -func (i *Instruction) AsReturn(vs wazevoapi.VarLength[Value]) *Instruction { - i.opcode = OpcodeReturn - i.vs = vs - return i -} - -// AsIreduce initializes this instruction as a reduction instruction with OpcodeIreduce. -func (i *Instruction) AsIreduce(v Value, dstType Type) *Instruction { - i.opcode = OpcodeIreduce - i.v = v - i.typ = dstType - return i -} - -// AsWiden initializes this instruction as a signed or unsigned widen instruction -// on low half or high half of the given vector with OpcodeSwidenLow, OpcodeUwidenLow, OpcodeSwidenHigh, OpcodeUwidenHigh. -func (i *Instruction) AsWiden(v Value, lane VecLane, signed, low bool) *Instruction { - switch { - case signed && low: - i.opcode = OpcodeSwidenLow - case !signed && low: - i.opcode = OpcodeUwidenLow - case signed && !low: - i.opcode = OpcodeSwidenHigh - case !signed && !low: - i.opcode = OpcodeUwidenHigh - } - i.v = v - i.u1 = uint64(lane) - return i -} - -// AsAtomicLoad initializes this instruction as an atomic load. -// The size is in bytes and must be 1, 2, 4, or 8. -func (i *Instruction) AsAtomicLoad(addr Value, size uint64, typ Type) *Instruction { - i.opcode = OpcodeAtomicLoad - i.u1 = size - i.v = addr - i.typ = typ - return i -} - -// AsAtomicLoad initializes this instruction as an atomic store. -// The size is in bytes and must be 1, 2, 4, or 8. -func (i *Instruction) AsAtomicStore(addr, val Value, size uint64) *Instruction { - i.opcode = OpcodeAtomicStore - i.u1 = size - i.v = addr - i.v2 = val - i.typ = val.Type() - return i -} - -// AsAtomicRmw initializes this instruction as an atomic read-modify-write. -// The size is in bytes and must be 1, 2, 4, or 8. -func (i *Instruction) AsAtomicRmw(op AtomicRmwOp, addr, val Value, size uint64) *Instruction { - i.opcode = OpcodeAtomicRmw - i.u1 = uint64(op) - i.u2 = size - i.v = addr - i.v2 = val - i.typ = val.Type() - return i -} - -// AsAtomicCas initializes this instruction as an atomic compare-and-swap. -// The size is in bytes and must be 1, 2, 4, or 8. -func (i *Instruction) AsAtomicCas(addr, exp, repl Value, size uint64) *Instruction { - i.opcode = OpcodeAtomicCas - i.u1 = size - i.v = addr - i.v2 = exp - i.v3 = repl - i.typ = repl.Type() - return i -} - -// AsFence initializes this instruction as a memory fence. -// A single byte immediate may be used to indicate fence ordering in the future -// but is currently always 0 and ignored. -func (i *Instruction) AsFence(order byte) *Instruction { - i.opcode = OpcodeFence - i.u1 = uint64(order) - return i -} - -// AtomicRmwData returns the data for this atomic read-modify-write instruction. -func (i *Instruction) AtomicRmwData() (op AtomicRmwOp, size uint64) { - return AtomicRmwOp(i.u1), i.u2 -} - -// AtomicTargetSize returns the target memory size of the atomic instruction. -func (i *Instruction) AtomicTargetSize() (size uint64) { - return i.u1 -} - -// ReturnVals returns the return values of OpcodeReturn. -func (i *Instruction) ReturnVals() []Value { - return i.vs.View() -} - -// AsExitWithCode initializes this instruction as a trap instruction with OpcodeExitWithCode. -func (i *Instruction) AsExitWithCode(ctx Value, code wazevoapi.ExitCode) { - i.opcode = OpcodeExitWithCode - i.v = ctx - i.u1 = uint64(code) -} - -// AsExitIfTrueWithCode initializes this instruction as a trap instruction with OpcodeExitIfTrueWithCode. -func (i *Instruction) AsExitIfTrueWithCode(ctx, c Value, code wazevoapi.ExitCode) *Instruction { - i.opcode = OpcodeExitIfTrueWithCode - i.v = ctx - i.v2 = c - i.u1 = uint64(code) - return i -} - -// ExitWithCodeData returns the context and exit code of OpcodeExitWithCode. -func (i *Instruction) ExitWithCodeData() (ctx Value, code wazevoapi.ExitCode) { - return i.v, wazevoapi.ExitCode(i.u1) -} - -// ExitIfTrueWithCodeData returns the context and exit code of OpcodeExitWithCode. -func (i *Instruction) ExitIfTrueWithCodeData() (ctx, c Value, code wazevoapi.ExitCode) { - return i.v, i.v2, wazevoapi.ExitCode(i.u1) -} - -// InvertBrx inverts either OpcodeBrz or OpcodeBrnz to the other. -func (i *Instruction) InvertBrx() { - switch i.opcode { - case OpcodeBrz: - i.opcode = OpcodeBrnz - case OpcodeBrnz: - i.opcode = OpcodeBrz - default: - panic("BUG") - } -} - -// BranchData returns the branch data for this instruction necessary for backends. -func (i *Instruction) BranchData() (condVal Value, blockArgs []Value, target BasicBlockID) { - switch i.opcode { - case OpcodeJump: - condVal = ValueInvalid - case OpcodeBrz, OpcodeBrnz: - condVal = i.v - default: - panic("BUG") - } - blockArgs = i.vs.View() - target = BasicBlockID(i.rValue) - return -} - -// BrTableData returns the branch table data for this instruction necessary for backends. -func (i *Instruction) BrTableData() (index Value, targets Values) { - if i.opcode != OpcodeBrTable { - panic("BUG: BrTableData only available for OpcodeBrTable") - } - index = i.v - targets = i.rValues - return -} - -// AsJump initializes this instruction as a jump instruction with OpcodeJump. -func (i *Instruction) AsJump(vs Values, target BasicBlock) *Instruction { - i.opcode = OpcodeJump - i.vs = vs - i.rValue = Value(target.ID()) - return i -} - -// IsFallthroughJump returns true if this instruction is a fallthrough jump. -func (i *Instruction) IsFallthroughJump() bool { - if i.opcode != OpcodeJump { - panic("BUG: IsFallthrough only available for OpcodeJump") - } - return i.opcode == OpcodeJump && i.u1 != 0 -} - -// AsFallthroughJump marks this instruction as a fallthrough jump. -func (i *Instruction) AsFallthroughJump() { - if i.opcode != OpcodeJump { - panic("BUG: AsFallthroughJump only available for OpcodeJump") - } - i.u1 = 1 -} - -// AsBrz initializes this instruction as a branch-if-zero instruction with OpcodeBrz. -func (i *Instruction) AsBrz(v Value, args Values, target BasicBlock) { - i.opcode = OpcodeBrz - i.v = v - i.vs = args - i.rValue = Value(target.ID()) -} - -// AsBrnz initializes this instruction as a branch-if-not-zero instruction with OpcodeBrnz. -func (i *Instruction) AsBrnz(v Value, args Values, target BasicBlock) *Instruction { - i.opcode = OpcodeBrnz - i.v = v - i.vs = args - i.rValue = Value(target.ID()) - return i -} - -// AsBrTable initializes this instruction as a branch-table instruction with OpcodeBrTable. -// targets is a list of basic block IDs cast to Values. -func (i *Instruction) AsBrTable(index Value, targets Values) { - i.opcode = OpcodeBrTable - i.v = index - i.rValues = targets -} - -// AsCall initializes this instruction as a call instruction with OpcodeCall. -func (i *Instruction) AsCall(ref FuncRef, sig *Signature, args Values) { - i.opcode = OpcodeCall - i.u1 = uint64(ref) - i.vs = args - i.u2 = uint64(sig.ID) - sig.used = true -} - -// CallData returns the call data for this instruction necessary for backends. -func (i *Instruction) CallData() (ref FuncRef, sigID SignatureID, args []Value) { - if i.opcode != OpcodeCall { - panic("BUG: CallData only available for OpcodeCall") - } - ref = FuncRef(i.u1) - sigID = SignatureID(i.u2) - args = i.vs.View() - return -} - -// AsCallIndirect initializes this instruction as a call-indirect instruction with OpcodeCallIndirect. -func (i *Instruction) AsCallIndirect(funcPtr Value, sig *Signature, args Values) *Instruction { - i.opcode = OpcodeCallIndirect - i.typ = TypeF64 - i.vs = args - i.v = funcPtr - i.u1 = uint64(sig.ID) - sig.used = true - return i -} - -// AsCallGoRuntimeMemmove is the same as AsCallIndirect, but with a special flag set to indicate that it is a call to the Go runtime memmove function. -func (i *Instruction) AsCallGoRuntimeMemmove(funcPtr Value, sig *Signature, args Values) *Instruction { - i.AsCallIndirect(funcPtr, sig, args) - i.u2 = 1 - return i -} - -// CallIndirectData returns the call indirect data for this instruction necessary for backends. -func (i *Instruction) CallIndirectData() (funcPtr Value, sigID SignatureID, args []Value, isGoMemmove bool) { - if i.opcode != OpcodeCallIndirect { - panic("BUG: CallIndirectData only available for OpcodeCallIndirect") - } - funcPtr = i.v - sigID = SignatureID(i.u1) - args = i.vs.View() - isGoMemmove = i.u2 == 1 - return -} - -// AsClz initializes this instruction as a Count Leading Zeroes instruction with OpcodeClz. -func (i *Instruction) AsClz(x Value) { - i.opcode = OpcodeClz - i.v = x - i.typ = x.Type() -} - -// AsCtz initializes this instruction as a Count Trailing Zeroes instruction with OpcodeCtz. -func (i *Instruction) AsCtz(x Value) { - i.opcode = OpcodeCtz - i.v = x - i.typ = x.Type() -} - -// AsPopcnt initializes this instruction as a Population Count instruction with OpcodePopcnt. -func (i *Instruction) AsPopcnt(x Value) { - i.opcode = OpcodePopcnt - i.v = x - i.typ = x.Type() -} - -// AsFneg initializes this instruction as an instruction with OpcodeFneg. -func (i *Instruction) AsFneg(x Value) *Instruction { - i.opcode = OpcodeFneg - i.v = x - i.typ = x.Type() - return i -} - -// AsSqrt initializes this instruction as an instruction with OpcodeSqrt. -func (i *Instruction) AsSqrt(x Value) *Instruction { - i.opcode = OpcodeSqrt - i.v = x - i.typ = x.Type() - return i -} - -// AsFabs initializes this instruction as an instruction with OpcodeFabs. -func (i *Instruction) AsFabs(x Value) *Instruction { - i.opcode = OpcodeFabs - i.v = x - i.typ = x.Type() - return i -} - -// AsFcopysign initializes this instruction as an instruction with OpcodeFcopysign. -func (i *Instruction) AsFcopysign(x, y Value) *Instruction { - i.opcode = OpcodeFcopysign - i.v = x - i.v2 = y - i.typ = x.Type() - return i -} - -// AsCeil initializes this instruction as an instruction with OpcodeCeil. -func (i *Instruction) AsCeil(x Value) *Instruction { - i.opcode = OpcodeCeil - i.v = x - i.typ = x.Type() - return i -} - -// AsFloor initializes this instruction as an instruction with OpcodeFloor. -func (i *Instruction) AsFloor(x Value) *Instruction { - i.opcode = OpcodeFloor - i.v = x - i.typ = x.Type() - return i -} - -// AsTrunc initializes this instruction as an instruction with OpcodeTrunc. -func (i *Instruction) AsTrunc(x Value) *Instruction { - i.opcode = OpcodeTrunc - i.v = x - i.typ = x.Type() - return i -} - -// AsNearest initializes this instruction as an instruction with OpcodeNearest. -func (i *Instruction) AsNearest(x Value) *Instruction { - i.opcode = OpcodeNearest - i.v = x - i.typ = x.Type() - return i -} - -// AsBitcast initializes this instruction as an instruction with OpcodeBitcast. -func (i *Instruction) AsBitcast(x Value, dstType Type) *Instruction { - i.opcode = OpcodeBitcast - i.v = x - i.typ = dstType - return i -} - -// BitcastData returns the operands for a bitcast instruction. -func (i *Instruction) BitcastData() (x Value, dstType Type) { - return i.v, i.typ -} - -// AsFdemote initializes this instruction as an instruction with OpcodeFdemote. -func (i *Instruction) AsFdemote(x Value) { - i.opcode = OpcodeFdemote - i.v = x - i.typ = TypeF32 -} - -// AsFpromote initializes this instruction as an instruction with OpcodeFpromote. -func (i *Instruction) AsFpromote(x Value) { - i.opcode = OpcodeFpromote - i.v = x - i.typ = TypeF64 -} - -// AsFcvtFromInt initializes this instruction as an instruction with either OpcodeFcvtFromUint or OpcodeFcvtFromSint -func (i *Instruction) AsFcvtFromInt(x Value, signed bool, dst64bit bool) *Instruction { - if signed { - i.opcode = OpcodeFcvtFromSint - } else { - i.opcode = OpcodeFcvtFromUint - } - i.v = x - if dst64bit { - i.typ = TypeF64 - } else { - i.typ = TypeF32 - } - return i -} - -// AsFcvtToInt initializes this instruction as an instruction with either OpcodeFcvtToUint or OpcodeFcvtToSint -func (i *Instruction) AsFcvtToInt(x, ctx Value, signed bool, dst64bit bool, sat bool) *Instruction { - switch { - case signed && !sat: - i.opcode = OpcodeFcvtToSint - case !signed && !sat: - i.opcode = OpcodeFcvtToUint - case signed && sat: - i.opcode = OpcodeFcvtToSintSat - case !signed && sat: - i.opcode = OpcodeFcvtToUintSat - } - i.v = x - i.v2 = ctx - if dst64bit { - i.typ = TypeI64 - } else { - i.typ = TypeI32 - } - return i -} - -// AsVFcvtToIntSat initializes this instruction as an instruction with either OpcodeVFcvtToSintSat or OpcodeVFcvtToUintSat -func (i *Instruction) AsVFcvtToIntSat(x Value, lane VecLane, signed bool) *Instruction { - if signed { - i.opcode = OpcodeVFcvtToSintSat - } else { - i.opcode = OpcodeVFcvtToUintSat - } - i.v = x - i.u1 = uint64(lane) - return i -} - -// AsVFcvtFromInt initializes this instruction as an instruction with either OpcodeVFcvtToSintSat or OpcodeVFcvtToUintSat -func (i *Instruction) AsVFcvtFromInt(x Value, lane VecLane, signed bool) *Instruction { - if signed { - i.opcode = OpcodeVFcvtFromSint - } else { - i.opcode = OpcodeVFcvtFromUint - } - i.v = x - i.u1 = uint64(lane) - return i -} - -// AsNarrow initializes this instruction as an instruction with either OpcodeSnarrow or OpcodeUnarrow -func (i *Instruction) AsNarrow(x, y Value, lane VecLane, signed bool) *Instruction { - if signed { - i.opcode = OpcodeSnarrow - } else { - i.opcode = OpcodeUnarrow - } - i.v = x - i.v2 = y - i.u1 = uint64(lane) - return i -} - -// AsFvpromoteLow initializes this instruction as an instruction with OpcodeFvpromoteLow -func (i *Instruction) AsFvpromoteLow(x Value, lane VecLane) *Instruction { - i.opcode = OpcodeFvpromoteLow - i.v = x - i.u1 = uint64(lane) - return i -} - -// AsFvdemote initializes this instruction as an instruction with OpcodeFvdemote -func (i *Instruction) AsFvdemote(x Value, lane VecLane) *Instruction { - i.opcode = OpcodeFvdemote - i.v = x - i.u1 = uint64(lane) - return i -} - -// AsSExtend initializes this instruction as a sign extension instruction with OpcodeSExtend. -func (i *Instruction) AsSExtend(v Value, from, to byte) *Instruction { - i.opcode = OpcodeSExtend - i.v = v - i.u1 = uint64(from)<<8 | uint64(to) - if to == 64 { - i.typ = TypeI64 - } else { - i.typ = TypeI32 - } - return i -} - -// AsUExtend initializes this instruction as an unsigned extension instruction with OpcodeUExtend. -func (i *Instruction) AsUExtend(v Value, from, to byte) *Instruction { - i.opcode = OpcodeUExtend - i.v = v - i.u1 = uint64(from)<<8 | uint64(to) - if to == 64 { - i.typ = TypeI64 - } else { - i.typ = TypeI32 - } - return i -} - -func (i *Instruction) ExtendData() (from, to byte, signed bool) { - if i.opcode != OpcodeSExtend && i.opcode != OpcodeUExtend { - panic("BUG: ExtendData only available for OpcodeSExtend and OpcodeUExtend") - } - from = byte(i.u1 >> 8) - to = byte(i.u1) - signed = i.opcode == OpcodeSExtend - return -} - -// AsSelect initializes this instruction as an unsigned extension instruction with OpcodeSelect. -func (i *Instruction) AsSelect(c, x, y Value) *Instruction { - i.opcode = OpcodeSelect - i.v = c - i.v2 = x - i.v3 = y - i.typ = x.Type() - return i -} - -// SelectData returns the select data for this instruction necessary for backends. -func (i *Instruction) SelectData() (c, x, y Value) { - c = i.v - x = i.v2 - y = i.v3 - return -} - -// ExtendFromToBits returns the from and to bit size for the extension instruction. -func (i *Instruction) ExtendFromToBits() (from, to byte) { - from = byte(i.u1 >> 8) - to = byte(i.u1) - return -} - -// Format returns a string representation of this instruction with the given builder. -// For debugging purposes only. -func (i *Instruction) Format(b Builder) string { - var instSuffix string - switch i.opcode { - case OpcodeExitWithCode: - instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), wazevoapi.ExitCode(i.u1)) - case OpcodeExitIfTrueWithCode: - instSuffix = fmt.Sprintf(" %s, %s, %s", i.v2.Format(b), i.v.Format(b), wazevoapi.ExitCode(i.u1)) - case OpcodeIadd, OpcodeIsub, OpcodeImul, OpcodeFadd, OpcodeFsub, OpcodeFmin, OpcodeFmax, OpcodeFdiv, OpcodeFmul: - instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), i.v2.Format(b)) - case OpcodeIcmp: - instSuffix = fmt.Sprintf(" %s, %s, %s", IntegerCmpCond(i.u1), i.v.Format(b), i.v2.Format(b)) - case OpcodeFcmp: - instSuffix = fmt.Sprintf(" %s, %s, %s", FloatCmpCond(i.u1), i.v.Format(b), i.v2.Format(b)) - case OpcodeSExtend, OpcodeUExtend: - instSuffix = fmt.Sprintf(" %s, %d->%d", i.v.Format(b), i.u1>>8, i.u1&0xff) - case OpcodeCall, OpcodeCallIndirect: - view := i.vs.View() - vs := make([]string, len(view)) - for idx := range vs { - vs[idx] = view[idx].Format(b) - } - if i.opcode == OpcodeCallIndirect { - instSuffix = fmt.Sprintf(" %s:%s, %s", i.v.Format(b), SignatureID(i.u1), strings.Join(vs, ", ")) - } else { - instSuffix = fmt.Sprintf(" %s:%s, %s", FuncRef(i.u1), SignatureID(i.u2), strings.Join(vs, ", ")) - } - case OpcodeStore, OpcodeIstore8, OpcodeIstore16, OpcodeIstore32: - instSuffix = fmt.Sprintf(" %s, %s, %#x", i.v.Format(b), i.v2.Format(b), uint32(i.u1)) - case OpcodeLoad, OpcodeVZeroExtLoad: - instSuffix = fmt.Sprintf(" %s, %#x", i.v.Format(b), int32(i.u1)) - case OpcodeLoadSplat: - instSuffix = fmt.Sprintf(".%s %s, %#x", VecLane(i.u2), i.v.Format(b), int32(i.u1)) - case OpcodeUload8, OpcodeUload16, OpcodeUload32, OpcodeSload8, OpcodeSload16, OpcodeSload32: - instSuffix = fmt.Sprintf(" %s, %#x", i.v.Format(b), int32(i.u1)) - case OpcodeSelect, OpcodeVbitselect: - instSuffix = fmt.Sprintf(" %s, %s, %s", i.v.Format(b), i.v2.Format(b), i.v3.Format(b)) - case OpcodeIconst: - switch i.typ { - case TypeI32: - instSuffix = fmt.Sprintf("_32 %#x", uint32(i.u1)) - case TypeI64: - instSuffix = fmt.Sprintf("_64 %#x", i.u1) - } - case OpcodeVconst: - instSuffix = fmt.Sprintf(" %016x %016x", i.u1, i.u2) - case OpcodeF32const: - instSuffix = fmt.Sprintf(" %f", math.Float32frombits(uint32(i.u1))) - case OpcodeF64const: - instSuffix = fmt.Sprintf(" %f", math.Float64frombits(i.u1)) - case OpcodeReturn: - view := i.vs.View() - if len(view) == 0 { - break - } - vs := make([]string, len(view)) - for idx := range vs { - vs[idx] = view[idx].Format(b) - } - instSuffix = fmt.Sprintf(" %s", strings.Join(vs, ", ")) - case OpcodeJump: - view := i.vs.View() - vs := make([]string, len(view)+1) - if i.IsFallthroughJump() { - vs[0] = " fallthrough" - } else { - blockId := BasicBlockID(i.rValue) - vs[0] = " " + b.BasicBlock(blockId).Name() - } - for idx := range view { - vs[idx+1] = view[idx].Format(b) - } - - instSuffix = strings.Join(vs, ", ") - case OpcodeBrz, OpcodeBrnz: - view := i.vs.View() - vs := make([]string, len(view)+2) - vs[0] = " " + i.v.Format(b) - blockId := BasicBlockID(i.rValue) - vs[1] = b.BasicBlock(blockId).Name() - for idx := range view { - vs[idx+2] = view[idx].Format(b) - } - instSuffix = strings.Join(vs, ", ") - case OpcodeBrTable: - // `BrTable index, [label1, label2, ... labelN]` - instSuffix = fmt.Sprintf(" %s", i.v.Format(b)) - instSuffix += ", [" - for i, target := range i.rValues.View() { - blk := b.BasicBlock(BasicBlockID(target)) - if i == 0 { - instSuffix += blk.Name() - } else { - instSuffix += ", " + blk.Name() - } - } - instSuffix += "]" - case OpcodeBand, OpcodeBor, OpcodeBxor, OpcodeRotr, OpcodeRotl, OpcodeIshl, OpcodeSshr, OpcodeUshr, - OpcodeSdiv, OpcodeUdiv, OpcodeFcopysign, OpcodeSrem, OpcodeUrem, - OpcodeVbnot, OpcodeVbxor, OpcodeVbor, OpcodeVband, OpcodeVbandnot, OpcodeVIcmp, OpcodeVFcmp: - instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), i.v2.Format(b)) - case OpcodeUndefined: - case OpcodeClz, OpcodeCtz, OpcodePopcnt, OpcodeFneg, OpcodeFcvtToSint, OpcodeFcvtToUint, OpcodeFcvtFromSint, - OpcodeFcvtFromUint, OpcodeFcvtToSintSat, OpcodeFcvtToUintSat, OpcodeFdemote, OpcodeFpromote, OpcodeIreduce, OpcodeBitcast, OpcodeSqrt, OpcodeFabs, - OpcodeCeil, OpcodeFloor, OpcodeTrunc, OpcodeNearest: - instSuffix = " " + i.v.Format(b) - case OpcodeVIadd, OpcodeExtIaddPairwise, OpcodeVSaddSat, OpcodeVUaddSat, OpcodeVIsub, OpcodeVSsubSat, OpcodeVUsubSat, - OpcodeVImin, OpcodeVUmin, OpcodeVImax, OpcodeVUmax, OpcodeVImul, OpcodeVAvgRound, - OpcodeVFadd, OpcodeVFsub, OpcodeVFmul, OpcodeVFdiv, - OpcodeVIshl, OpcodeVSshr, OpcodeVUshr, - OpcodeVFmin, OpcodeVFmax, OpcodeVMinPseudo, OpcodeVMaxPseudo, - OpcodeSnarrow, OpcodeUnarrow, OpcodeSwizzle, OpcodeSqmulRoundSat: - instSuffix = fmt.Sprintf(".%s %s, %s", VecLane(i.u1), i.v.Format(b), i.v2.Format(b)) - case OpcodeVIabs, OpcodeVIneg, OpcodeVIpopcnt, OpcodeVhighBits, OpcodeVallTrue, OpcodeVanyTrue, - OpcodeVFabs, OpcodeVFneg, OpcodeVSqrt, OpcodeVCeil, OpcodeVFloor, OpcodeVTrunc, OpcodeVNearest, - OpcodeVFcvtToUintSat, OpcodeVFcvtToSintSat, OpcodeVFcvtFromUint, OpcodeVFcvtFromSint, - OpcodeFvpromoteLow, OpcodeFvdemote, OpcodeSwidenLow, OpcodeUwidenLow, OpcodeSwidenHigh, OpcodeUwidenHigh, - OpcodeSplat: - instSuffix = fmt.Sprintf(".%s %s", VecLane(i.u1), i.v.Format(b)) - case OpcodeExtractlane: - var signedness string - if i.u1 != 0 { - signedness = "signed" - } else { - signedness = "unsigned" - } - instSuffix = fmt.Sprintf(".%s %d, %s (%s)", VecLane(i.u2), 0x0000FFFF&i.u1, i.v.Format(b), signedness) - case OpcodeInsertlane: - instSuffix = fmt.Sprintf(".%s %d, %s, %s", VecLane(i.u2), i.u1, i.v.Format(b), i.v2.Format(b)) - case OpcodeShuffle: - lanes := make([]byte, 16) - for idx := 0; idx < 8; idx++ { - lanes[idx] = byte(i.u1 >> (8 * idx)) - } - for idx := 0; idx < 8; idx++ { - lanes[idx+8] = byte(i.u2 >> (8 * idx)) - } - // Prints Shuffle.[0 1 2 3 4 5 6 7 ...] v2, v3 - instSuffix = fmt.Sprintf(".%v %s, %s", lanes, i.v.Format(b), i.v2.Format(b)) - case OpcodeAtomicRmw: - instSuffix = fmt.Sprintf(" %s_%d, %s, %s", AtomicRmwOp(i.u1), 8*i.u2, i.v.Format(b), i.v2.Format(b)) - case OpcodeAtomicLoad: - instSuffix = fmt.Sprintf("_%d, %s", 8*i.u1, i.v.Format(b)) - case OpcodeAtomicStore: - instSuffix = fmt.Sprintf("_%d, %s, %s", 8*i.u1, i.v.Format(b), i.v2.Format(b)) - case OpcodeAtomicCas: - instSuffix = fmt.Sprintf("_%d, %s, %s, %s", 8*i.u1, i.v.Format(b), i.v2.Format(b), i.v3.Format(b)) - case OpcodeFence: - instSuffix = fmt.Sprintf(" %d", i.u1) - case OpcodeWideningPairwiseDotProductS: - instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), i.v2.Format(b)) - default: - panic(fmt.Sprintf("TODO: format for %s", i.opcode)) - } - - instr := i.opcode.String() + instSuffix - - var rvs []string - r1, rs := i.Returns() - if r1.Valid() { - rvs = append(rvs, r1.formatWithType(b)) - } - - for _, v := range rs { - rvs = append(rvs, v.formatWithType(b)) - } - - if len(rvs) > 0 { - return fmt.Sprintf("%s = %s", strings.Join(rvs, ", "), instr) - } else { - return instr - } -} - -// addArgumentBranchInst adds an argument to this instruction. -func (i *Instruction) addArgumentBranchInst(b *builder, v Value) { - switch i.opcode { - case OpcodeJump, OpcodeBrz, OpcodeBrnz: - i.vs = i.vs.Append(&b.varLengthPool, v) - default: - panic("BUG: " + i.opcode.String()) - } -} - -// Constant returns true if this instruction is a constant instruction. -func (i *Instruction) Constant() bool { - switch i.opcode { - case OpcodeIconst, OpcodeF32const, OpcodeF64const: - return true - } - return false -} - -// ConstantVal returns the constant value of this instruction. -// How to interpret the return value depends on the opcode. -func (i *Instruction) ConstantVal() (ret uint64) { - switch i.opcode { - case OpcodeIconst, OpcodeF32const, OpcodeF64const: - ret = i.u1 - default: - panic("TODO") - } - return -} - -// String implements fmt.Stringer. -func (o Opcode) String() (ret string) { - switch o { - case OpcodeInvalid: - return "invalid" - case OpcodeUndefined: - return "Undefined" - case OpcodeJump: - return "Jump" - case OpcodeBrz: - return "Brz" - case OpcodeBrnz: - return "Brnz" - case OpcodeBrTable: - return "BrTable" - case OpcodeExitWithCode: - return "Exit" - case OpcodeExitIfTrueWithCode: - return "ExitIfTrue" - case OpcodeReturn: - return "Return" - case OpcodeCall: - return "Call" - case OpcodeCallIndirect: - return "CallIndirect" - case OpcodeSplat: - return "Splat" - case OpcodeSwizzle: - return "Swizzle" - case OpcodeInsertlane: - return "Insertlane" - case OpcodeExtractlane: - return "Extractlane" - case OpcodeLoad: - return "Load" - case OpcodeLoadSplat: - return "LoadSplat" - case OpcodeStore: - return "Store" - case OpcodeUload8: - return "Uload8" - case OpcodeSload8: - return "Sload8" - case OpcodeIstore8: - return "Istore8" - case OpcodeUload16: - return "Uload16" - case OpcodeSload16: - return "Sload16" - case OpcodeIstore16: - return "Istore16" - case OpcodeUload32: - return "Uload32" - case OpcodeSload32: - return "Sload32" - case OpcodeIstore32: - return "Istore32" - case OpcodeIconst: - return "Iconst" - case OpcodeF32const: - return "F32const" - case OpcodeF64const: - return "F64const" - case OpcodeVconst: - return "Vconst" - case OpcodeShuffle: - return "Shuffle" - case OpcodeSelect: - return "Select" - case OpcodeVanyTrue: - return "VanyTrue" - case OpcodeVallTrue: - return "VallTrue" - case OpcodeVhighBits: - return "VhighBits" - case OpcodeIcmp: - return "Icmp" - case OpcodeIcmpImm: - return "IcmpImm" - case OpcodeVIcmp: - return "VIcmp" - case OpcodeIadd: - return "Iadd" - case OpcodeIsub: - return "Isub" - case OpcodeImul: - return "Imul" - case OpcodeUdiv: - return "Udiv" - case OpcodeSdiv: - return "Sdiv" - case OpcodeUrem: - return "Urem" - case OpcodeSrem: - return "Srem" - case OpcodeBand: - return "Band" - case OpcodeBor: - return "Bor" - case OpcodeBxor: - return "Bxor" - case OpcodeBnot: - return "Bnot" - case OpcodeRotl: - return "Rotl" - case OpcodeRotr: - return "Rotr" - case OpcodeIshl: - return "Ishl" - case OpcodeUshr: - return "Ushr" - case OpcodeSshr: - return "Sshr" - case OpcodeClz: - return "Clz" - case OpcodeCtz: - return "Ctz" - case OpcodePopcnt: - return "Popcnt" - case OpcodeFcmp: - return "Fcmp" - case OpcodeFadd: - return "Fadd" - case OpcodeFsub: - return "Fsub" - case OpcodeFmul: - return "Fmul" - case OpcodeFdiv: - return "Fdiv" - case OpcodeSqmulRoundSat: - return "SqmulRoundSat" - case OpcodeSqrt: - return "Sqrt" - case OpcodeFneg: - return "Fneg" - case OpcodeFabs: - return "Fabs" - case OpcodeFcopysign: - return "Fcopysign" - case OpcodeFmin: - return "Fmin" - case OpcodeFmax: - return "Fmax" - case OpcodeCeil: - return "Ceil" - case OpcodeFloor: - return "Floor" - case OpcodeTrunc: - return "Trunc" - case OpcodeNearest: - return "Nearest" - case OpcodeBitcast: - return "Bitcast" - case OpcodeIreduce: - return "Ireduce" - case OpcodeSnarrow: - return "Snarrow" - case OpcodeUnarrow: - return "Unarrow" - case OpcodeSwidenLow: - return "SwidenLow" - case OpcodeSwidenHigh: - return "SwidenHigh" - case OpcodeUwidenLow: - return "UwidenLow" - case OpcodeUwidenHigh: - return "UwidenHigh" - case OpcodeExtIaddPairwise: - return "IaddPairwise" - case OpcodeWideningPairwiseDotProductS: - return "WideningPairwiseDotProductS" - case OpcodeUExtend: - return "UExtend" - case OpcodeSExtend: - return "SExtend" - case OpcodeFpromote: - return "Fpromote" - case OpcodeFdemote: - return "Fdemote" - case OpcodeFvdemote: - return "Fvdemote" - case OpcodeFcvtToUint: - return "FcvtToUint" - case OpcodeFcvtToSint: - return "FcvtToSint" - case OpcodeFcvtToUintSat: - return "FcvtToUintSat" - case OpcodeFcvtToSintSat: - return "FcvtToSintSat" - case OpcodeFcvtFromUint: - return "FcvtFromUint" - case OpcodeFcvtFromSint: - return "FcvtFromSint" - case OpcodeAtomicRmw: - return "AtomicRmw" - case OpcodeAtomicCas: - return "AtomicCas" - case OpcodeAtomicLoad: - return "AtomicLoad" - case OpcodeAtomicStore: - return "AtomicStore" - case OpcodeFence: - return "Fence" - case OpcodeVbor: - return "Vbor" - case OpcodeVbxor: - return "Vbxor" - case OpcodeVband: - return "Vband" - case OpcodeVbandnot: - return "Vbandnot" - case OpcodeVbnot: - return "Vbnot" - case OpcodeVbitselect: - return "Vbitselect" - case OpcodeVIadd: - return "VIadd" - case OpcodeVSaddSat: - return "VSaddSat" - case OpcodeVUaddSat: - return "VUaddSat" - case OpcodeVSsubSat: - return "VSsubSat" - case OpcodeVUsubSat: - return "VUsubSat" - case OpcodeVAvgRound: - return "OpcodeVAvgRound" - case OpcodeVIsub: - return "VIsub" - case OpcodeVImin: - return "VImin" - case OpcodeVUmin: - return "VUmin" - case OpcodeVImax: - return "VImax" - case OpcodeVUmax: - return "VUmax" - case OpcodeVImul: - return "VImul" - case OpcodeVIabs: - return "VIabs" - case OpcodeVIneg: - return "VIneg" - case OpcodeVIpopcnt: - return "VIpopcnt" - case OpcodeVIshl: - return "VIshl" - case OpcodeVUshr: - return "VUshr" - case OpcodeVSshr: - return "VSshr" - case OpcodeVFabs: - return "VFabs" - case OpcodeVFmax: - return "VFmax" - case OpcodeVFmin: - return "VFmin" - case OpcodeVFneg: - return "VFneg" - case OpcodeVFadd: - return "VFadd" - case OpcodeVFsub: - return "VFsub" - case OpcodeVFmul: - return "VFmul" - case OpcodeVFdiv: - return "VFdiv" - case OpcodeVFcmp: - return "VFcmp" - case OpcodeVCeil: - return "VCeil" - case OpcodeVFloor: - return "VFloor" - case OpcodeVTrunc: - return "VTrunc" - case OpcodeVNearest: - return "VNearest" - case OpcodeVMaxPseudo: - return "VMaxPseudo" - case OpcodeVMinPseudo: - return "VMinPseudo" - case OpcodeVSqrt: - return "VSqrt" - case OpcodeVFcvtToUintSat: - return "VFcvtToUintSat" - case OpcodeVFcvtToSintSat: - return "VFcvtToSintSat" - case OpcodeVFcvtFromUint: - return "VFcvtFromUint" - case OpcodeVFcvtFromSint: - return "VFcvtFromSint" - case OpcodeFvpromoteLow: - return "FvpromoteLow" - case OpcodeVZeroExtLoad: - return "VZeroExtLoad" - } - panic(fmt.Sprintf("unknown opcode %d", o)) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go deleted file mode 100644 index b9763791d..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go +++ /dev/null @@ -1,393 +0,0 @@ -package ssa - -import ( - "fmt" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -// RunPasses implements Builder.RunPasses. -// -// The order here matters; some pass depends on the previous ones. -// -// Note that passes suffixed with "Opt" are the optimization passes, meaning that they edit the instructions and blocks -// while the other passes are not, like passEstimateBranchProbabilities does not edit them, but only calculates the additional information. -func (b *builder) RunPasses() { - b.runPreBlockLayoutPasses() - b.runBlockLayoutPass() - b.runPostBlockLayoutPasses() - b.runFinalizingPasses() -} - -func (b *builder) runPreBlockLayoutPasses() { - passSortSuccessors(b) - passDeadBlockEliminationOpt(b) - // The result of passCalculateImmediateDominators will be used by various passes below. - passCalculateImmediateDominators(b) - passRedundantPhiEliminationOpt(b) - passNopInstElimination(b) - - // TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic. - // WebAssembly program shouldn't result in irreducible CFG, but we should handle it properly in just in case. - // See FixIrreducible pass in LLVM: https://llvm.org/doxygen/FixIrreducible_8cpp_source.html - - // TODO: implement more optimization passes like: - // block coalescing. - // Copy-propagation. - // Constant folding. - // Common subexpression elimination. - // Arithmetic simplifications. - // and more! - - // passDeadCodeEliminationOpt could be more accurate if we do this after other optimizations. - passDeadCodeEliminationOpt(b) - b.donePreBlockLayoutPasses = true -} - -func (b *builder) runBlockLayoutPass() { - if !b.donePreBlockLayoutPasses { - panic("runBlockLayoutPass must be called after all pre passes are done") - } - passLayoutBlocks(b) - b.doneBlockLayout = true -} - -// runPostBlockLayoutPasses runs the post block layout passes. After this point, CFG is somewhat stable, -// but still can be modified before finalizing passes. At this point, critical edges are split by passLayoutBlocks. -func (b *builder) runPostBlockLayoutPasses() { - if !b.doneBlockLayout { - panic("runPostBlockLayoutPasses must be called after block layout pass is done") - } - // TODO: Do more. e.g. tail duplication, loop unrolling, etc. - - b.donePostBlockLayoutPasses = true -} - -// runFinalizingPasses runs the finalizing passes. After this point, CFG should not be modified. -func (b *builder) runFinalizingPasses() { - if !b.donePostBlockLayoutPasses { - panic("runFinalizingPasses must be called after post block layout passes are done") - } - // Critical edges are split, so we fix the loop nesting forest. - passBuildLoopNestingForest(b) - passBuildDominatorTree(b) - // Now that we know the final placement of the blocks, we can explicitly mark the fallthrough jumps. - b.markFallthroughJumps() -} - -// passDeadBlockEliminationOpt searches the unreachable blocks, and sets the basicBlock.invalid flag true if so. -func passDeadBlockEliminationOpt(b *builder) { - entryBlk := b.entryBlk() - b.blkStack = append(b.blkStack, entryBlk) - for len(b.blkStack) > 0 { - reachableBlk := b.blkStack[len(b.blkStack)-1] - b.blkStack = b.blkStack[:len(b.blkStack)-1] - reachableBlk.visited = 1 - - if !reachableBlk.sealed && !reachableBlk.ReturnBlock() { - panic(fmt.Sprintf("%s is not sealed", reachableBlk)) - } - - if wazevoapi.SSAValidationEnabled { - reachableBlk.validate(b) - } - - for _, succ := range reachableBlk.success { - if succ.visited == 1 { - continue - } - b.blkStack = append(b.blkStack, succ) - } - } - - for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { - if blk.visited != 1 { - blk.invalid = true - } - blk.visited = 0 - } -} - -// passRedundantPhiEliminationOpt eliminates the redundant PHIs (in our terminology, parameters of a block). -// This requires the reverse post-order traversal to be calculated before calling this function, -// hence passCalculateImmediateDominators must be called before this. -func passRedundantPhiEliminationOpt(b *builder) { - redundantParams := b.redundantParams[:0] // reuse the slice from previous iterations. - - // TODO: this might be costly for large programs, but at least, as far as I did the experiment, it's almost the - // same as the single iteration version in terms of the overall compilation time. That *might be* mostly thanks to the fact - // that removing many PHIs results in the reduction of the total instructions, not because of this indefinite iteration is - // relatively small. For example, sqlite speedtest binary results in the large number of redundant PHIs, - // the maximum number of iteration was 22, which seems to be acceptable but not that small either since the - // complexity here is O(BlockNum * Iterations) at the worst case where BlockNum might be the order of thousands. - // -- Note -- - // Currently, each iteration can run in any order of blocks, but it empirically converges quickly in practice when - // running on the reverse post-order. It might be possible to optimize this further by using the dominator tree. - for { - changed := false - _ = b.blockIteratorReversePostOrderBegin() // skip entry block! - // Below, we intentionally use the named iteration variable name, as this comes with inevitable nested for loops! - for blk := b.blockIteratorReversePostOrderNext(); blk != nil; blk = b.blockIteratorReversePostOrderNext() { - params := blk.params.View() - paramNum := len(params) - - for paramIndex := 0; paramIndex < paramNum; paramIndex++ { - phiValue := params[paramIndex] - redundant := true - - nonSelfReferencingValue := ValueInvalid - for predIndex := range blk.preds { - br := blk.preds[predIndex].branch - // Resolve the alias in the arguments so that we could use the previous iteration's result. - b.resolveArgumentAlias(br) - pred := br.vs.View()[paramIndex] - if pred == phiValue { - // This is self-referencing: PHI from the same PHI. - continue - } - - if !nonSelfReferencingValue.Valid() { - nonSelfReferencingValue = pred - continue - } - - if nonSelfReferencingValue != pred { - redundant = false - break - } - } - - if !nonSelfReferencingValue.Valid() { - // This shouldn't happen, and must be a bug in builder.go. - panic("BUG: params added but only self-referencing") - } - - if redundant { - redundantParams = append(redundantParams, redundantParam{ - index: paramIndex, uniqueValue: nonSelfReferencingValue, - }) - } - } - - if len(redundantParams) == 0 { - continue - } - changed = true - - // Remove the redundant PHIs from the argument list of branching instructions. - for predIndex := range blk.preds { - redundantParamsCur, predParamCur := 0, 0 - predBlk := blk.preds[predIndex] - branchInst := predBlk.branch - view := branchInst.vs.View() - for argIndex, value := range view { - if len(redundantParams) == redundantParamsCur || - redundantParams[redundantParamsCur].index != argIndex { - view[predParamCur] = value - predParamCur++ - } else { - redundantParamsCur++ - } - } - branchInst.vs.Cut(predParamCur) - } - - // Still need to have the definition of the value of the PHI (previously as the parameter). - for i := range redundantParams { - redundantValue := &redundantParams[i] - phiValue := params[redundantValue.index] - // Create an alias in this block from the only phi argument to the phi value. - b.alias(phiValue, redundantValue.uniqueValue) - } - - // Finally, Remove the param from the blk. - paramsCur, redundantParamsCur := 0, 0 - for paramIndex := 0; paramIndex < paramNum; paramIndex++ { - param := params[paramIndex] - if len(redundantParams) == redundantParamsCur || redundantParams[redundantParamsCur].index != paramIndex { - params[paramsCur] = param - paramsCur++ - } else { - redundantParamsCur++ - } - } - blk.params.Cut(paramsCur) - - // Clears the map for the next iteration. - redundantParams = redundantParams[:0] - } - - if !changed { - break - } - } - - // Reuse the slice for the future passes. - b.redundantParams = redundantParams -} - -// passDeadCodeEliminationOpt traverses all the instructions, and calculates the reference count of each Value, and -// eliminates all the unnecessary instructions whose ref count is zero. -// The results are stored at builder.valueRefCounts. This also assigns a InstructionGroupID to each Instruction -// during the process. This is the last SSA-level optimization pass and after this, -// the SSA function is ready to be used by backends. -// -// TODO: the algorithm here might not be efficient. Get back to this later. -func passDeadCodeEliminationOpt(b *builder) { - nvid := int(b.nextValueID) - if nvid >= len(b.valuesInfo) { - l := nvid - len(b.valuesInfo) + 1 - b.valuesInfo = append(b.valuesInfo, make([]ValueInfo, l)...) - view := b.valuesInfo[len(b.valuesInfo)-l:] - for i := range view { - view[i].alias = ValueInvalid - } - } - - // First, we gather all the instructions with side effects. - liveInstructions := b.instStack[:0] - // During the process, we will assign InstructionGroupID to each instruction, which is not - // relevant to dead code elimination, but we need in the backend. - var gid InstructionGroupID - for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { - for cur := blk.rootInstr; cur != nil; cur = cur.next { - cur.gid = gid - switch cur.sideEffect() { - case sideEffectTraps: - // The trappable should always be alive. - liveInstructions = append(liveInstructions, cur) - case sideEffectStrict: - liveInstructions = append(liveInstructions, cur) - // The strict side effect should create different instruction groups. - gid++ - } - } - } - - // Find all the instructions referenced by live instructions transitively. - for len(liveInstructions) > 0 { - tail := len(liveInstructions) - 1 - live := liveInstructions[tail] - liveInstructions = liveInstructions[:tail] - if live.live { - // If it's already marked alive, this is referenced multiple times, - // so we can skip it. - continue - } - live.live = true - - // Before we walk, we need to resolve the alias first. - b.resolveArgumentAlias(live) - - v1, v2, v3, vs := live.Args() - if v1.Valid() { - producingInst := b.InstructionOfValue(v1) - if producingInst != nil { - liveInstructions = append(liveInstructions, producingInst) - } - } - - if v2.Valid() { - producingInst := b.InstructionOfValue(v2) - if producingInst != nil { - liveInstructions = append(liveInstructions, producingInst) - } - } - - if v3.Valid() { - producingInst := b.InstructionOfValue(v3) - if producingInst != nil { - liveInstructions = append(liveInstructions, producingInst) - } - } - - for _, v := range vs { - producingInst := b.InstructionOfValue(v) - if producingInst != nil { - liveInstructions = append(liveInstructions, producingInst) - } - } - } - - // Now that all the live instructions are flagged as live=true, we eliminate all dead instructions. - for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { - for cur := blk.rootInstr; cur != nil; cur = cur.next { - if !cur.live { - // Remove the instruction from the list. - if prev := cur.prev; prev != nil { - prev.next = cur.next - } else { - blk.rootInstr = cur.next - } - if next := cur.next; next != nil { - next.prev = cur.prev - } - continue - } - - // If the value alive, we can be sure that arguments are used definitely. - // Hence, we can increment the value reference counts. - v1, v2, v3, vs := cur.Args() - if v1.Valid() { - b.incRefCount(v1.ID(), cur) - } - if v2.Valid() { - b.incRefCount(v2.ID(), cur) - } - if v3.Valid() { - b.incRefCount(v3.ID(), cur) - } - for _, v := range vs { - b.incRefCount(v.ID(), cur) - } - } - } - - b.instStack = liveInstructions // we reuse the stack for the next iteration. -} - -func (b *builder) incRefCount(id ValueID, from *Instruction) { - if wazevoapi.SSALoggingEnabled { - fmt.Printf("v%d referenced from %v\n", id, from.Format(b)) - } - info := &b.valuesInfo[id] - info.RefCount++ -} - -// passNopInstElimination eliminates the instructions which is essentially a no-op. -func passNopInstElimination(b *builder) { - for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { - for cur := blk.rootInstr; cur != nil; cur = cur.next { - switch cur.Opcode() { - // TODO: add more logics here. - case OpcodeIshl, OpcodeSshr, OpcodeUshr: - x, amount := cur.Arg2() - definingInst := b.InstructionOfValue(amount) - if definingInst == nil { - // If there's no defining instruction, that means the amount is coming from the parameter. - continue - } - if definingInst.Constant() { - v := definingInst.ConstantVal() - - if x.Type().Bits() == 64 { - v = v % 64 - } else { - v = v % 32 - } - if v == 0 { - b.alias(cur.Return(), x) - } - } - } - } - } -} - -// passSortSuccessors sorts the successors of each block in the natural program order. -func passSortSuccessors(b *builder) { - for i := 0; i < b.basicBlocksPool.Allocated(); i++ { - blk := b.basicBlocksPool.View(i) - sortBlocks(blk.success) - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go deleted file mode 100644 index 0118e8b2e..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go +++ /dev/null @@ -1,334 +0,0 @@ -package ssa - -import ( - "fmt" - "strings" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -// passLayoutBlocks implements Builder.LayoutBlocks. This re-organizes builder.reversePostOrderedBasicBlocks. -// -// TODO: there are tons of room for improvement here. e.g. LLVM has BlockPlacementPass using BlockFrequencyInfo, -// BranchProbabilityInfo, and LoopInfo to do a much better job. Also, if we have the profiling instrumentation -// like ball-larus algorithm, then we could do profile-guided optimization. Basically all of them are trying -// to maximize the fall-through opportunities which is most efficient. -// -// Here, fallthrough happens when a block ends with jump instruction whose target is the right next block in the -// builder.reversePostOrderedBasicBlocks. -// -// Currently, we just place blocks using the DFS reverse post-order of the dominator tree with the heuristics: -// 1. a split edge trampoline towards a loop header will be placed as a fallthrough. -// 2. we invert the brz and brnz if it makes the fallthrough more likely. -// -// This heuristic is done in maybeInvertBranches function. -func passLayoutBlocks(b *builder) { - // We might end up splitting critical edges which adds more basic blocks, - // so we store the currently existing basic blocks in nonSplitBlocks temporarily. - // That way we can iterate over the original basic blocks while appending new ones into reversePostOrderedBasicBlocks. - nonSplitBlocks := b.blkStack[:0] - for i, blk := range b.reversePostOrderedBasicBlocks { - if !blk.Valid() { - continue - } - nonSplitBlocks = append(nonSplitBlocks, blk) - if i != len(b.reversePostOrderedBasicBlocks)-1 { - _ = maybeInvertBranches(b, blk, b.reversePostOrderedBasicBlocks[i+1]) - } - } - - var trampolines []*basicBlock - - // Reset the order slice since we update on the fly by splitting critical edges. - b.reversePostOrderedBasicBlocks = b.reversePostOrderedBasicBlocks[:0] - uninsertedTrampolines := b.blkStack2[:0] - for _, blk := range nonSplitBlocks { - for i := range blk.preds { - pred := blk.preds[i].blk - if pred.visited == 1 || !pred.Valid() { - continue - } else if pred.reversePostOrder < blk.reversePostOrder { - // This means the edge is critical, and this pred is the trampoline and yet to be inserted. - // Split edge trampolines must come before the destination in reverse post-order. - b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, pred) - pred.visited = 1 // mark as inserted. - } - } - - // Now that we've already added all the potential trampoline blocks incoming to this block, - // we can add this block itself. - b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, blk) - blk.visited = 1 // mark as inserted. - - if len(blk.success) < 2 { - // There won't be critical edge originating from this block. - continue - } else if blk.currentInstr.opcode == OpcodeBrTable { - // We don't split critical edges here, because at the construction site of BrTable, we already split the edges. - continue - } - - for sidx, succ := range blk.success { - if !succ.ReturnBlock() && // If the successor is a return block, we need to split the edge any way because we need "epilogue" to be inserted. - // Plus if there's no multiple incoming edges to this successor, (pred, succ) is not critical. - len(succ.preds) < 2 { - continue - } - - // Otherwise, we are sure this is a critical edge. To modify the CFG, we need to find the predecessor info - // from the successor. - var predInfo *basicBlockPredecessorInfo - for i := range succ.preds { // This linear search should not be a problem since the number of predecessors should almost always small. - pred := &succ.preds[i] - if pred.blk == blk { - predInfo = pred - break - } - } - - if predInfo == nil { - // This must be a bug in somewhere around branch manipulation. - panic("BUG: predecessor info not found while the successor exists in successors list") - } - - if wazevoapi.SSALoggingEnabled { - fmt.Printf("trying to split edge from %d->%d at %s\n", - blk.ID(), succ.ID(), predInfo.branch.Format(b)) - } - - trampoline := b.splitCriticalEdge(blk, succ, predInfo) - // Update the successors slice because the target is no longer the original `succ`. - blk.success[sidx] = trampoline - - if wazevoapi.SSAValidationEnabled { - trampolines = append(trampolines, trampoline) - } - - if wazevoapi.SSALoggingEnabled { - fmt.Printf("edge split from %d->%d at %s as %d->%d->%d \n", - blk.ID(), succ.ID(), predInfo.branch.Format(b), - blk.ID(), trampoline.ID(), succ.ID()) - } - - fallthroughBranch := blk.currentInstr - if fallthroughBranch.opcode == OpcodeJump && BasicBlockID(fallthroughBranch.rValue) == trampoline.id { - // This can be lowered as fallthrough at the end of the block. - b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline) - trampoline.visited = 1 // mark as inserted. - } else { - uninsertedTrampolines = append(uninsertedTrampolines, trampoline) - } - } - - for _, trampoline := range uninsertedTrampolines { - if trampoline.success[0].reversePostOrder <= trampoline.reversePostOrder { // "<=", not "<" because the target might be itself. - // This means the critical edge was backward, so we insert after the current block immediately. - b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline) - trampoline.visited = 1 // mark as inserted. - } // If the target is forward, we can wait to insert until the target is inserted. - } - uninsertedTrampolines = uninsertedTrampolines[:0] // Reuse the stack for the next block. - } - - if wazevoapi.SSALoggingEnabled { - var bs []string - for _, blk := range b.reversePostOrderedBasicBlocks { - bs = append(bs, blk.Name()) - } - fmt.Println("ordered blocks: ", strings.Join(bs, ", ")) - } - - if wazevoapi.SSAValidationEnabled { - for _, trampoline := range trampolines { - if trampoline.visited != 1 { - panic("BUG: trampoline block not inserted: " + trampoline.formatHeader(b)) - } - trampoline.validate(b) - } - } - - // Reuse the stack for the next iteration. - b.blkStack2 = uninsertedTrampolines[:0] -} - -// markFallthroughJumps finds the fallthrough jumps and marks them as such. -func (b *builder) markFallthroughJumps() { - l := len(b.reversePostOrderedBasicBlocks) - 1 - for i, blk := range b.reversePostOrderedBasicBlocks { - if i < l { - cur := blk.currentInstr - if cur.opcode == OpcodeJump && BasicBlockID(cur.rValue) == b.reversePostOrderedBasicBlocks[i+1].id { - cur.AsFallthroughJump() - } - } - } -} - -// maybeInvertBranches inverts the branch instructions if it is likely possible to the fallthrough more likely with simple heuristics. -// nextInRPO is the next block in the reverse post-order. -// -// Returns true if the branch is inverted for testing purpose. -func maybeInvertBranches(b *builder, now *basicBlock, nextInRPO *basicBlock) bool { - fallthroughBranch := now.currentInstr - if fallthroughBranch.opcode == OpcodeBrTable { - return false - } - - condBranch := fallthroughBranch.prev - if condBranch == nil || (condBranch.opcode != OpcodeBrnz && condBranch.opcode != OpcodeBrz) { - return false - } - - if len(fallthroughBranch.vs.View()) != 0 || len(condBranch.vs.View()) != 0 { - // If either one of them has arguments, we don't invert the branches. - return false - } - - // So this block has two branches (a conditional branch followed by an unconditional branch) at the end. - // We can invert the condition of the branch if it makes the fallthrough more likely. - - fallthroughTarget := b.basicBlock(BasicBlockID(fallthroughBranch.rValue)) - condTarget := b.basicBlock(BasicBlockID(condBranch.rValue)) - - if fallthroughTarget.loopHeader { - // First, if the tail's target is loopHeader, we don't need to do anything here, - // because the edge is likely to be critical edge for complex loops (e.g. loop with branches inside it). - // That means, we will split the edge in the end of LayoutBlocks function, and insert the trampoline block - // right after this block, which will be fallthrough in any way. - return false - } else if condTarget.loopHeader { - // On the other hand, if the condBranch's target is loopHeader, we invert the condition of the branch - // so that we could get the fallthrough to the trampoline block. - goto invert - } - - if fallthroughTarget == nextInRPO { - // Also, if the tail's target is the next block in the reverse post-order, we don't need to do anything here, - // because if this is not critical edge, we would end up placing these two blocks adjacent to each other. - // Even if it is the critical edge, we place the trampoline block right after this block, which will be fallthrough in any way. - return false - } else if condTarget == nextInRPO { - // If the condBranch's target is the next block in the reverse post-order, we invert the condition of the branch - // so that we could get the fallthrough to the block. - goto invert - } else { - return false - } - -invert: - for i := range fallthroughTarget.preds { - pred := &fallthroughTarget.preds[i] - if pred.branch == fallthroughBranch { - pred.branch = condBranch - break - } - } - for i := range condTarget.preds { - pred := &condTarget.preds[i] - if pred.branch == condBranch { - pred.branch = fallthroughBranch - break - } - } - - condBranch.InvertBrx() - condBranch.rValue = Value(fallthroughTarget.ID()) - fallthroughBranch.rValue = Value(condTarget.ID()) - if wazevoapi.SSALoggingEnabled { - fmt.Printf("inverting branches at %d->%d and %d->%d\n", - now.ID(), fallthroughTarget.ID(), now.ID(), condTarget.ID()) - } - - return true -} - -// splitCriticalEdge splits the critical edge between the given predecessor (`pred`) and successor (owning `predInfo`). -// -// - `pred` is the source of the critical edge, -// - `succ` is the destination of the critical edge, -// - `predInfo` is the predecessor info in the succ.preds slice which represents the critical edge. -// -// Why splitting critical edges is important? See following links: -// -// - https://en.wikipedia.org/wiki/Control-flow_graph -// - https://nickdesaulniers.github.io/blog/2023/01/27/critical-edge-splitting/ -// -// The returned basic block is the trampoline block which is inserted to split the critical edge. -func (b *builder) splitCriticalEdge(pred, succ *basicBlock, predInfo *basicBlockPredecessorInfo) *basicBlock { - // In the following, we convert the following CFG: - // - // pred --(originalBranch)--> succ - // - // to the following CFG: - // - // pred --(newBranch)--> trampoline --(originalBranch)-> succ - // - // where trampoline is a new basic block which is created to split the critical edge. - - trampoline := b.allocateBasicBlock() - if int(trampoline.id) >= len(b.dominators) { - b.dominators = append(b.dominators, make([]*basicBlock, trampoline.id+1)...) - } - b.dominators[trampoline.id] = pred - - originalBranch := predInfo.branch - - // Replace originalBranch with the newBranch. - newBranch := b.AllocateInstruction() - newBranch.opcode = originalBranch.opcode - newBranch.rValue = Value(trampoline.ID()) - switch originalBranch.opcode { - case OpcodeJump: - case OpcodeBrz, OpcodeBrnz: - originalBranch.opcode = OpcodeJump // Trampoline consists of one unconditional branch. - newBranch.v = originalBranch.v - originalBranch.v = ValueInvalid - default: - panic("BUG: critical edge shouldn't be originated from br_table") - } - swapInstruction(pred, originalBranch, newBranch) - - // Replace the original branch with the new branch. - trampoline.rootInstr = originalBranch - trampoline.currentInstr = originalBranch - trampoline.success = append(trampoline.success, succ) // Do not use []*basicBlock{pred} because we might have already allocated the slice. - trampoline.preds = append(trampoline.preds, // same as ^. - basicBlockPredecessorInfo{blk: pred, branch: newBranch}) - b.Seal(trampoline) - - // Update the original branch to point to the trampoline. - predInfo.blk = trampoline - predInfo.branch = originalBranch - - if wazevoapi.SSAValidationEnabled { - trampoline.validate(b) - } - - if len(trampoline.params.View()) > 0 { - panic("trampoline should not have params") - } - - // Assign the same order as the original block so that this will be placed before the actual destination. - trampoline.reversePostOrder = pred.reversePostOrder - return trampoline -} - -// swapInstruction replaces `old` in the block `blk` with `New`. -func swapInstruction(blk *basicBlock, old, New *Instruction) { - if blk.rootInstr == old { - blk.rootInstr = New - next := old.next - New.next = next - next.prev = New - } else { - if blk.currentInstr == old { - blk.currentInstr = New - } - prev := old.prev - prev.next, New.prev = New, prev - if next := old.next; next != nil { - New.next, next.prev = next, New - } - } - old.prev, old.next = nil, nil -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go deleted file mode 100644 index e8288c4bd..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go +++ /dev/null @@ -1,313 +0,0 @@ -package ssa - -import ( - "fmt" - "math" - "strings" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -// passCalculateImmediateDominators calculates immediate dominators for each basic block. -// The result is stored in b.dominators. This make it possible for the following passes to -// use builder.isDominatedBy to check if a block is dominated by another block. -// -// At the last of pass, this function also does the loop detection and sets the basicBlock.loop flag. -func passCalculateImmediateDominators(b *builder) { - reversePostOrder := b.reversePostOrderedBasicBlocks[:0] - - // Store the reverse postorder from the entrypoint into reversePostOrder slice. - // This calculation of reverse postorder is not described in the paper, - // so we use heuristic to calculate it so that we could potentially handle arbitrary - // complex CFGs under the assumption that success is sorted in program's natural order. - // That means blk.success[i] always appears before blk.success[i+1] in the source program, - // which is a reasonable assumption as long as SSA Builder is properly used. - // - // First we push blocks in postorder iteratively visit successors of the entry block. - entryBlk := b.entryBlk() - exploreStack := append(b.blkStack[:0], entryBlk) - // These flags are used to track the state of the block in the DFS traversal. - // We temporarily use the reversePostOrder field to store the state. - const visitStateUnseen, visitStateSeen, visitStateDone = 0, 1, 2 - entryBlk.visited = visitStateSeen - for len(exploreStack) > 0 { - tail := len(exploreStack) - 1 - blk := exploreStack[tail] - exploreStack = exploreStack[:tail] - switch blk.visited { - case visitStateUnseen: - // This is likely a bug in the frontend. - panic("BUG: unsupported CFG") - case visitStateSeen: - // This is the first time to pop this block, and we have to see the successors first. - // So push this block again to the stack. - exploreStack = append(exploreStack, blk) - // And push the successors to the stack if necessary. - for _, succ := range blk.success { - if succ.ReturnBlock() || succ.invalid { - continue - } - if succ.visited == visitStateUnseen { - succ.visited = visitStateSeen - exploreStack = append(exploreStack, succ) - } - } - // Finally, we could pop this block once we pop all of its successors. - blk.visited = visitStateDone - case visitStateDone: - // Note: at this point we push blk in postorder despite its name. - reversePostOrder = append(reversePostOrder, blk) - default: - panic("BUG") - } - } - // At this point, reversePostOrder has postorder actually, so we reverse it. - for i := len(reversePostOrder)/2 - 1; i >= 0; i-- { - j := len(reversePostOrder) - 1 - i - reversePostOrder[i], reversePostOrder[j] = reversePostOrder[j], reversePostOrder[i] - } - - for i, blk := range reversePostOrder { - blk.reversePostOrder = int32(i) - } - - // Reuse the dominators slice if possible from the previous computation of function. - b.dominators = b.dominators[:cap(b.dominators)] - if len(b.dominators) < b.basicBlocksPool.Allocated() { - // Generously reserve space in the slice because the slice will be reused future allocation. - b.dominators = append(b.dominators, make([]*basicBlock, b.basicBlocksPool.Allocated())...) - } - calculateDominators(reversePostOrder, b.dominators) - - // Reuse the slices for the future use. - b.blkStack = exploreStack - - // For the following passes. - b.reversePostOrderedBasicBlocks = reversePostOrder - - // Ready to detect loops! - subPassLoopDetection(b) -} - -// calculateDominators calculates the immediate dominator of each node in the CFG, and store the result in `doms`. -// The algorithm is based on the one described in the paper "A Simple, Fast Dominance Algorithm" -// https://www.cs.rice.edu/~keith/EMBED/dom.pdf which is a faster/simple alternative to the well known Lengauer-Tarjan algorithm. -// -// The following code almost matches the pseudocode in the paper with one exception (see the code comment below). -// -// The result slice `doms` must be pre-allocated with the size larger than the size of dfsBlocks. -func calculateDominators(reversePostOrderedBlks []*basicBlock, doms []*basicBlock) { - entry, reversePostOrderedBlks := reversePostOrderedBlks[0], reversePostOrderedBlks[1: /* skips entry point */] - for _, blk := range reversePostOrderedBlks { - doms[blk.id] = nil - } - doms[entry.id] = entry - - changed := true - for changed { - changed = false - for _, blk := range reversePostOrderedBlks { - var u *basicBlock - for i := range blk.preds { - pred := blk.preds[i].blk - // Skip if this pred is not reachable yet. Note that this is not described in the paper, - // but it is necessary to handle nested loops etc. - if doms[pred.id] == nil { - continue - } - - if u == nil { - u = pred - continue - } else { - u = intersect(doms, u, pred) - } - } - if doms[blk.id] != u { - doms[blk.id] = u - changed = true - } - } - } -} - -// intersect returns the common dominator of blk1 and blk2. -// -// This is the `intersect` function in the paper. -func intersect(doms []*basicBlock, blk1 *basicBlock, blk2 *basicBlock) *basicBlock { - finger1, finger2 := blk1, blk2 - for finger1 != finger2 { - // Move the 'finger1' upwards to its immediate dominator. - for finger1.reversePostOrder > finger2.reversePostOrder { - finger1 = doms[finger1.id] - } - // Move the 'finger2' upwards to its immediate dominator. - for finger2.reversePostOrder > finger1.reversePostOrder { - finger2 = doms[finger2.id] - } - } - return finger1 -} - -// subPassLoopDetection detects loops in the function using the immediate dominators. -// -// This is run at the last of passCalculateImmediateDominators. -func subPassLoopDetection(b *builder) { - for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { - for i := range blk.preds { - pred := blk.preds[i].blk - if pred.invalid { - continue - } - if b.isDominatedBy(pred, blk) { - blk.loopHeader = true - } - } - } -} - -// buildLoopNestingForest builds the loop nesting forest for the function. -// This must be called after branch splitting since it relies on the CFG. -func passBuildLoopNestingForest(b *builder) { - ent := b.entryBlk() - doms := b.dominators - for _, blk := range b.reversePostOrderedBasicBlocks { - n := doms[blk.id] - for !n.loopHeader && n != ent { - n = doms[n.id] - } - - if n == ent && blk.loopHeader { - b.loopNestingForestRoots = append(b.loopNestingForestRoots, blk) - } else if n == ent { - } else if n.loopHeader { - n.loopNestingForestChildren = n.loopNestingForestChildren.Append(&b.varLengthBasicBlockPool, blk) - } - } - - if wazevoapi.SSALoggingEnabled { - for _, root := range b.loopNestingForestRoots { - printLoopNestingForest(root.(*basicBlock), 0) - } - } -} - -func printLoopNestingForest(root *basicBlock, depth int) { - fmt.Println(strings.Repeat("\t", depth), "loop nesting forest root:", root.ID()) - for _, child := range root.loopNestingForestChildren.View() { - fmt.Println(strings.Repeat("\t", depth+1), "child:", child.ID()) - if child.LoopHeader() { - printLoopNestingForest(child.(*basicBlock), depth+2) - } - } -} - -type dominatorSparseTree struct { - time int32 - euler []*basicBlock - first, depth []int32 - table [][]int32 -} - -// passBuildDominatorTree builds the dominator tree for the function, and constructs builder.sparseTree. -func passBuildDominatorTree(b *builder) { - // First we materialize the children of each node in the dominator tree. - idoms := b.dominators - for _, blk := range b.reversePostOrderedBasicBlocks { - parent := idoms[blk.id] - if parent == nil { - panic("BUG") - } else if parent == blk { - // This is the entry block. - continue - } - if prev := parent.child; prev == nil { - parent.child = blk - } else { - parent.child = blk - blk.sibling = prev - } - } - - // Reset the state from the previous computation. - n := b.basicBlocksPool.Allocated() - st := &b.sparseTree - st.euler = append(st.euler[:0], make([]*basicBlock, 2*n-1)...) - st.first = append(st.first[:0], make([]int32, n)...) - for i := range st.first { - st.first[i] = -1 - } - st.depth = append(st.depth[:0], make([]int32, 2*n-1)...) - st.time = 0 - - // Start building the sparse tree. - st.eulerTour(b.entryBlk(), 0) - st.buildSparseTable() -} - -func (dt *dominatorSparseTree) eulerTour(node *basicBlock, height int32) { - if wazevoapi.SSALoggingEnabled { - fmt.Println(strings.Repeat("\t", int(height)), "euler tour:", node.ID()) - } - dt.euler[dt.time] = node - dt.depth[dt.time] = height - if dt.first[node.id] == -1 { - dt.first[node.id] = dt.time - } - dt.time++ - - for child := node.child; child != nil; child = child.sibling { - dt.eulerTour(child, height+1) - dt.euler[dt.time] = node // add the current node again after visiting a child - dt.depth[dt.time] = height - dt.time++ - } -} - -// buildSparseTable builds a sparse table for RMQ queries. -func (dt *dominatorSparseTree) buildSparseTable() { - n := len(dt.depth) - k := int(math.Log2(float64(n))) + 1 - table := dt.table - - if n >= len(table) { - table = append(table, make([][]int32, n-len(table)+1)...) - } - for i := range table { - if len(table[i]) < k { - table[i] = append(table[i], make([]int32, k-len(table[i]))...) - } - table[i][0] = int32(i) - } - - for j := 1; 1<<j <= n; j++ { - for i := 0; i+(1<<j)-1 < n; i++ { - if dt.depth[table[i][j-1]] < dt.depth[table[i+(1<<(j-1))][j-1]] { - table[i][j] = table[i][j-1] - } else { - table[i][j] = table[i+(1<<(j-1))][j-1] - } - } - } - dt.table = table -} - -// rmq performs a range minimum query on the sparse table. -func (dt *dominatorSparseTree) rmq(l, r int32) int32 { - table := dt.table - depth := dt.depth - j := int(math.Log2(float64(r - l + 1))) - if depth[table[l][j]] <= depth[table[r-(1<<j)+1][j]] { - return table[l][j] - } - return table[r-(1<<j)+1][j] -} - -// findLCA finds the LCA using the Euler tour and RMQ. -func (dt *dominatorSparseTree) findLCA(u, v BasicBlockID) *basicBlock { - first := dt.first - if first[u] > first[v] { - u, v = v, u - } - return dt.euler[dt.rmq(first[u], first[v])] -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/signature.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/signature.go deleted file mode 100644 index 43483395a..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/signature.go +++ /dev/null @@ -1,49 +0,0 @@ -package ssa - -import ( - "fmt" - "strings" -) - -// Signature is a function prototype. -type Signature struct { - // ID is a unique identifier for this signature used to lookup. - ID SignatureID - // Params and Results are the types of the parameters and results of the function. - Params, Results []Type - - // used is true if this is used by the currently-compiled function. - // Debugging only. - used bool -} - -// String implements fmt.Stringer. -func (s *Signature) String() string { - str := strings.Builder{} - str.WriteString(s.ID.String()) - str.WriteString(": ") - if len(s.Params) > 0 { - for _, typ := range s.Params { - str.WriteString(typ.String()) - } - } else { - str.WriteByte('v') - } - str.WriteByte('_') - if len(s.Results) > 0 { - for _, typ := range s.Results { - str.WriteString(typ.String()) - } - } else { - str.WriteByte('v') - } - return str.String() -} - -// SignatureID is an unique identifier used to lookup. -type SignatureID int - -// String implements fmt.Stringer. -func (s SignatureID) String() string { - return fmt.Sprintf("sig%d", s) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/ssa.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/ssa.go deleted file mode 100644 index b477e58bd..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/ssa.go +++ /dev/null @@ -1,14 +0,0 @@ -// Package ssa is used to construct SSA function. By nature this is free of Wasm specific thing -// and ISA. -// -// We use the "block argument" variant of SSA: https://en.wikipedia.org/wiki/Static_single-assignment_form#Block_arguments -// which is equivalent to the traditional PHI function based one, but more convenient during optimizations. -// However, in this package's source code comment, we might use PHI whenever it seems necessary in order to be aligned with -// existing literatures, e.g. SSA level optimization algorithms are often described using PHI nodes. -// -// The rationale doc for the choice of "block argument" by MLIR of LLVM is worth a read: -// https://mlir.llvm.org/docs/Rationale/Rationale/#block-arguments-vs-phi-nodes -// -// The algorithm to resolve variable definitions used here is based on the paper -// "Simple and Efficient Construction of Static Single Assignment Form": https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf. -package ssa diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go deleted file mode 100644 index 73daf4269..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go +++ /dev/null @@ -1,115 +0,0 @@ -package ssa - -type Type byte - -const ( - typeInvalid Type = iota - - // TODO: add 8, 16 bit types when it's needed for optimizations. - - // TypeI32 represents an integer type with 32 bits. - TypeI32 - - // TypeI64 represents an integer type with 64 bits. - TypeI64 - - // TypeF32 represents 32-bit floats in the IEEE 754. - TypeF32 - - // TypeF64 represents 64-bit floats in the IEEE 754. - TypeF64 - - // TypeV128 represents 128-bit SIMD vectors. - TypeV128 - - // -- Do not add new types after this line. ---- - typeEnd -) - -// String implements fmt.Stringer. -func (t Type) String() (ret string) { - switch t { - case typeInvalid: - return "invalid" - case TypeI32: - return "i32" - case TypeI64: - return "i64" - case TypeF32: - return "f32" - case TypeF64: - return "f64" - case TypeV128: - return "v128" - default: - panic(int(t)) - } -} - -// IsInt returns true if the type is an integer type. -func (t Type) IsInt() bool { - return t == TypeI32 || t == TypeI64 -} - -// IsFloat returns true if the type is a floating point type. -func (t Type) IsFloat() bool { - return t == TypeF32 || t == TypeF64 -} - -// Bits returns the number of bits required to represent the type. -func (t Type) Bits() byte { - switch t { - case TypeI32, TypeF32: - return 32 - case TypeI64, TypeF64: - return 64 - case TypeV128: - return 128 - default: - panic(int(t)) - } -} - -// Size returns the number of bytes required to represent the type. -func (t Type) Size() byte { - return t.Bits() / 8 -} - -func (t Type) invalid() bool { - return t == typeInvalid -} - -// VecLane represents a lane in a SIMD vector. -type VecLane byte - -const ( - VecLaneInvalid VecLane = 1 + iota - VecLaneI8x16 - VecLaneI16x8 - VecLaneI32x4 - VecLaneI64x2 - VecLaneF32x4 - VecLaneF64x2 -) - -// String implements fmt.Stringer. -func (vl VecLane) String() (ret string) { - switch vl { - case VecLaneInvalid: - return "invalid" - case VecLaneI8x16: - return "i8x16" - case VecLaneI16x8: - return "i16x8" - case VecLaneI32x4: - return "i32x4" - case VecLaneI64x2: - return "i64x2" - case VecLaneF32x4: - return "f32x4" - case VecLaneF64x2: - return "f64x2" - default: - panic(int(vl)) - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/vs.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/vs.go deleted file mode 100644 index d906e7e35..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/vs.go +++ /dev/null @@ -1,114 +0,0 @@ -package ssa - -import ( - "fmt" - "math" - - "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -// Variable is a unique identifier for a source program's variable and will correspond to -// multiple ssa Value(s). -// -// For example, `Local 1` is a Variable in WebAssembly, and Value(s) will be created for it -// whenever it executes `local.set 1`. -// -// Variable is useful to track the SSA Values of a variable in the source program, and -// can be used to find the corresponding latest SSA Value via Builder.FindValue. -// -// Higher 4-bit is used to store Type for this variable. -type Variable uint32 - -// String implements fmt.Stringer. -func (v Variable) String() string { - return fmt.Sprintf("var%d", v&0x0fffffff) -} - -func (v Variable) setType(typ Type) Variable { - if v >= 1<<28 { - panic(fmt.Sprintf("Too large variable: %d", v)) - } - return Variable(typ)<<28 | v -} - -func (v Variable) getType() Type { - return Type(v >> 28) -} - -// Value represents an SSA value with a type information. The relationship with Variable is 1: N (including 0), -// that means there might be multiple Variable(s) for a Value. -// -// 32 to 59-bit is used to store the unique identifier of the Instruction that generates this value if any. -// 60 to 63-bit is used to store Type for this value. -type Value uint64 - -// ValueID is the lower 32bit of Value, which is the pure identifier of Value without type info. -type ValueID uint32 - -const ( - valueIDInvalid ValueID = math.MaxUint32 - ValueInvalid = Value(valueIDInvalid) -) - -// Format creates a debug string for this Value using the data stored in Builder. -func (v Value) Format(b Builder) string { - if annotation, ok := b.(*builder).valueAnnotations[v.ID()]; ok { - return annotation - } - return fmt.Sprintf("v%d", v.ID()) -} - -func (v Value) formatWithType(b Builder) (ret string) { - if annotation, ok := b.(*builder).valueAnnotations[v.ID()]; ok { - ret = annotation + ":" + v.Type().String() - } else { - ret = fmt.Sprintf("v%d:%s", v.ID(), v.Type()) - } - - if wazevoapi.SSALoggingEnabled { // This is useful to check live value analysis bugs. - if bd := b.(*builder); bd.donePostBlockLayoutPasses { - id := v.ID() - ret += fmt.Sprintf("(ref=%d)", bd.valuesInfo[id].RefCount) - } - } - return ret -} - -// Valid returns true if this value is valid. -func (v Value) Valid() bool { - return v.ID() != valueIDInvalid -} - -// Type returns the Type of this value. -func (v Value) Type() Type { - return Type(v >> 60) -} - -// ID returns the valueID of this value. -func (v Value) ID() ValueID { - return ValueID(v) -} - -// setType sets a type to this Value and returns the updated Value. -func (v Value) setType(typ Type) Value { - return v | Value(typ)<<60 -} - -// setInstructionID sets an Instruction.id to this Value and returns the updated Value. -func (v Value) setInstructionID(id int) Value { - if id < 0 || uint(id) >= 1<<28 { - panic(fmt.Sprintf("Too large instruction ID: %d", id)) - } - return v | Value(id)<<32 -} - -// instructionID() returns the Instruction.id of this Value. -func (v Value) instructionID() int { - return int(v>>32) & 0x0fffffff -} - -// Values is a slice of Value. Use this instead of []Value to reuse the underlying memory. -type Values = wazevoapi.VarLength[Value] - -// ValuesNil is a nil Values. -var ValuesNil = wazevoapi.NewNilVarLength[Value]() diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go deleted file mode 100644 index 2db61e219..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go +++ /dev/null @@ -1,196 +0,0 @@ -package wazevoapi - -import ( - "context" - "encoding/hex" - "fmt" - "math/rand" - "os" - "time" -) - -// These consts are used various places in the wazevo implementations. -// Instead of defining them in each file, we define them here so that we can quickly iterate on -// debugging without spending "where do we have debug logging?" time. - -// ----- Debug logging ----- -// These consts must be disabled by default. Enable them only when debugging. - -const ( - FrontEndLoggingEnabled = false - SSALoggingEnabled = false - RegAllocLoggingEnabled = false -) - -// ----- Output prints ----- -// These consts must be disabled by default. Enable them only when debugging. - -const ( - PrintSSA = false - PrintOptimizedSSA = false - PrintSSAToBackendIRLowering = false - PrintRegisterAllocated = false - PrintFinalizedMachineCode = false - PrintMachineCodeHexPerFunction = printMachineCodeHexPerFunctionUnmodified || PrintMachineCodeHexPerFunctionDisassemblable //nolint - printMachineCodeHexPerFunctionUnmodified = false - // PrintMachineCodeHexPerFunctionDisassemblable prints the machine code while modifying the actual result - // to make it disassemblable. This is useful when debugging the final machine code. See the places where this is used for detail. - // When this is enabled, functions must not be called. - PrintMachineCodeHexPerFunctionDisassemblable = false -) - -// printTarget is the function index to print the machine code. This is used for debugging to print the machine code -// of a specific function. -const printTarget = -1 - -// PrintEnabledIndex returns true if the current function index is the print target. -func PrintEnabledIndex(ctx context.Context) bool { - if printTarget == -1 { - return true - } - return GetCurrentFunctionIndex(ctx) == printTarget -} - -// ----- Validations ----- -const ( - // SSAValidationEnabled enables the SSA validation. This is disabled by default since the operation is expensive. - SSAValidationEnabled = false -) - -// ----- Stack Guard Check ----- -const ( - // StackGuardCheckEnabled enables the stack guard check to ensure that our stack bounds check works correctly. - StackGuardCheckEnabled = false - StackGuardCheckGuardPageSize = 8096 -) - -// CheckStackGuardPage checks the given stack guard page is not corrupted. -func CheckStackGuardPage(s []byte) { - for i := 0; i < StackGuardCheckGuardPageSize; i++ { - if s[i] != 0 { - panic( - fmt.Sprintf("BUG: stack guard page is corrupted:\n\tguard_page=%s\n\tstack=%s", - hex.EncodeToString(s[:StackGuardCheckGuardPageSize]), - hex.EncodeToString(s[StackGuardCheckGuardPageSize:]), - )) - } - } -} - -// ----- Deterministic compilation verifier ----- - -const ( - // DeterministicCompilationVerifierEnabled enables the deterministic compilation verifier. This is disabled by default - // since the operation is expensive. But when in doubt, enable this to make sure the compilation is deterministic. - DeterministicCompilationVerifierEnabled = false - DeterministicCompilationVerifyingIter = 5 -) - -type ( - verifierState struct { - initialCompilationDone bool - maybeRandomizedIndexes []int - r *rand.Rand - values map[string]string - } - verifierStateContextKey struct{} - currentFunctionNameKey struct{} - currentFunctionIndexKey struct{} -) - -// NewDeterministicCompilationVerifierContext creates a new context with the deterministic compilation verifier used per wasm.Module. -func NewDeterministicCompilationVerifierContext(ctx context.Context, localFunctions int) context.Context { - maybeRandomizedIndexes := make([]int, localFunctions) - for i := range maybeRandomizedIndexes { - maybeRandomizedIndexes[i] = i - } - r := rand.New(rand.NewSource(time.Now().UnixNano())) - return context.WithValue(ctx, verifierStateContextKey{}, &verifierState{ - r: r, maybeRandomizedIndexes: maybeRandomizedIndexes, values: map[string]string{}, - }) -} - -// DeterministicCompilationVerifierRandomizeIndexes randomizes the indexes for the deterministic compilation verifier. -// To get the randomized index, use DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex. -func DeterministicCompilationVerifierRandomizeIndexes(ctx context.Context) { - state := ctx.Value(verifierStateContextKey{}).(*verifierState) - if !state.initialCompilationDone { - // If this is the first attempt, we use the index as-is order. - state.initialCompilationDone = true - return - } - r := state.r - r.Shuffle(len(state.maybeRandomizedIndexes), func(i, j int) { - state.maybeRandomizedIndexes[i], state.maybeRandomizedIndexes[j] = state.maybeRandomizedIndexes[j], state.maybeRandomizedIndexes[i] - }) -} - -// DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex returns the randomized index for the given `index` -// which is assigned by DeterministicCompilationVerifierRandomizeIndexes. -func DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex(ctx context.Context, index int) int { - state := ctx.Value(verifierStateContextKey{}).(*verifierState) - ret := state.maybeRandomizedIndexes[index] - return ret -} - -// VerifyOrSetDeterministicCompilationContextValue verifies that the `newValue` is the same as the previous value for the given `scope` -// and the current function name. If the previous value doesn't exist, it sets the value to the given `newValue`. -// -// If the verification fails, this prints the diff and exits the process. -func VerifyOrSetDeterministicCompilationContextValue(ctx context.Context, scope string, newValue string) { - fn := ctx.Value(currentFunctionNameKey{}).(string) - key := fn + ": " + scope - verifierCtx := ctx.Value(verifierStateContextKey{}).(*verifierState) - oldValue, ok := verifierCtx.values[key] - if !ok { - verifierCtx.values[key] = newValue - return - } - if oldValue != newValue { - fmt.Printf( - `BUG: Deterministic compilation failed for function%s at scope="%s". - -This is mostly due to (but might not be limited to): - * Resetting ssa.Builder, backend.Compiler or frontend.Compiler, etc doens't work as expected, and the compilation has been affected by the previous iterations. - * Using a map with non-deterministic iteration order. - ----------- [old] ---------- -%s - ----------- [new] ---------- -%s -`, - fn, scope, oldValue, newValue, - ) - os.Exit(1) - } -} - -// nolint -const NeedFunctionNameInContext = PrintSSA || - PrintOptimizedSSA || - PrintSSAToBackendIRLowering || - PrintRegisterAllocated || - PrintFinalizedMachineCode || - PrintMachineCodeHexPerFunction || - DeterministicCompilationVerifierEnabled || - PerfMapEnabled - -// SetCurrentFunctionName sets the current function name to the given `functionName`. -func SetCurrentFunctionName(ctx context.Context, index int, functionName string) context.Context { - ctx = context.WithValue(ctx, currentFunctionNameKey{}, functionName) - ctx = context.WithValue(ctx, currentFunctionIndexKey{}, index) - return ctx -} - -// GetCurrentFunctionName returns the current function name. -func GetCurrentFunctionName(ctx context.Context) string { - ret, _ := ctx.Value(currentFunctionNameKey{}).(string) - return ret -} - -// GetCurrentFunctionIndex returns the current function index. -func GetCurrentFunctionIndex(ctx context.Context) int { - ret, _ := ctx.Value(currentFunctionIndexKey{}).(int) - return ret -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/exitcode.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/exitcode.go deleted file mode 100644 index 5ad594982..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/exitcode.go +++ /dev/null @@ -1,109 +0,0 @@ -package wazevoapi - -// ExitCode is an exit code of an execution of a function. -type ExitCode uint32 - -const ( - ExitCodeOK ExitCode = iota - ExitCodeGrowStack - ExitCodeGrowMemory - ExitCodeUnreachable - ExitCodeMemoryOutOfBounds - // ExitCodeCallGoModuleFunction is an exit code for a call to an api.GoModuleFunction. - ExitCodeCallGoModuleFunction - // ExitCodeCallGoFunction is an exit code for a call to an api.GoFunction. - ExitCodeCallGoFunction - ExitCodeTableOutOfBounds - ExitCodeIndirectCallNullPointer - ExitCodeIndirectCallTypeMismatch - ExitCodeIntegerDivisionByZero - ExitCodeIntegerOverflow - ExitCodeInvalidConversionToInteger - ExitCodeCheckModuleExitCode - ExitCodeCallListenerBefore - ExitCodeCallListenerAfter - ExitCodeCallGoModuleFunctionWithListener - ExitCodeCallGoFunctionWithListener - ExitCodeTableGrow - ExitCodeRefFunc - ExitCodeMemoryWait32 - ExitCodeMemoryWait64 - ExitCodeMemoryNotify - ExitCodeUnalignedAtomic - exitCodeMax -) - -const ExitCodeMask = 0xff - -// String implements fmt.Stringer. -func (e ExitCode) String() string { - switch e { - case ExitCodeOK: - return "ok" - case ExitCodeGrowStack: - return "grow_stack" - case ExitCodeCallGoModuleFunction: - return "call_go_module_function" - case ExitCodeCallGoFunction: - return "call_go_function" - case ExitCodeUnreachable: - return "unreachable" - case ExitCodeMemoryOutOfBounds: - return "memory_out_of_bounds" - case ExitCodeUnalignedAtomic: - return "unaligned_atomic" - case ExitCodeTableOutOfBounds: - return "table_out_of_bounds" - case ExitCodeIndirectCallNullPointer: - return "indirect_call_null_pointer" - case ExitCodeIndirectCallTypeMismatch: - return "indirect_call_type_mismatch" - case ExitCodeIntegerDivisionByZero: - return "integer_division_by_zero" - case ExitCodeIntegerOverflow: - return "integer_overflow" - case ExitCodeInvalidConversionToInteger: - return "invalid_conversion_to_integer" - case ExitCodeCheckModuleExitCode: - return "check_module_exit_code" - case ExitCodeCallListenerBefore: - return "call_listener_before" - case ExitCodeCallListenerAfter: - return "call_listener_after" - case ExitCodeCallGoModuleFunctionWithListener: - return "call_go_module_function_with_listener" - case ExitCodeCallGoFunctionWithListener: - return "call_go_function_with_listener" - case ExitCodeGrowMemory: - return "grow_memory" - case ExitCodeTableGrow: - return "table_grow" - case ExitCodeRefFunc: - return "ref_func" - case ExitCodeMemoryWait32: - return "memory_wait32" - case ExitCodeMemoryWait64: - return "memory_wait64" - case ExitCodeMemoryNotify: - return "memory_notify" - } - panic("TODO") -} - -func ExitCodeCallGoModuleFunctionWithIndex(index int, withListener bool) ExitCode { - if withListener { - return ExitCodeCallGoModuleFunctionWithListener | ExitCode(index<<8) - } - return ExitCodeCallGoModuleFunction | ExitCode(index<<8) -} - -func ExitCodeCallGoFunctionWithIndex(index int, withListener bool) ExitCode { - if withListener { - return ExitCodeCallGoFunctionWithListener | ExitCode(index<<8) - } - return ExitCodeCallGoFunction | ExitCode(index<<8) -} - -func GoFunctionIndexFromExitCode(exitCode ExitCode) int { - return int(exitCode >> 8) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/offsetdata.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/offsetdata.go deleted file mode 100644 index fe6161b04..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/offsetdata.go +++ /dev/null @@ -1,216 +0,0 @@ -package wazevoapi - -import ( - "github.com/tetratelabs/wazero/internal/wasm" -) - -const ( - // FunctionInstanceSize is the size of wazevo.functionInstance. - FunctionInstanceSize = 24 - // FunctionInstanceExecutableOffset is an offset of `executable` field in wazevo.functionInstance - FunctionInstanceExecutableOffset = 0 - // FunctionInstanceModuleContextOpaquePtrOffset is an offset of `moduleContextOpaquePtr` field in wazevo.functionInstance - FunctionInstanceModuleContextOpaquePtrOffset = 8 - // FunctionInstanceTypeIDOffset is an offset of `typeID` field in wazevo.functionInstance - FunctionInstanceTypeIDOffset = 16 -) - -const ( - // ExecutionContextOffsetExitCodeOffset is an offset of `exitCode` field in wazevo.executionContext - ExecutionContextOffsetExitCodeOffset Offset = 0 - // ExecutionContextOffsetCallerModuleContextPtr is an offset of `callerModuleContextPtr` field in wazevo.executionContext - ExecutionContextOffsetCallerModuleContextPtr Offset = 8 - // ExecutionContextOffsetOriginalFramePointer is an offset of `originalFramePointer` field in wazevo.executionContext - ExecutionContextOffsetOriginalFramePointer Offset = 16 - // ExecutionContextOffsetOriginalStackPointer is an offset of `originalStackPointer` field in wazevo.executionContext - ExecutionContextOffsetOriginalStackPointer Offset = 24 - // ExecutionContextOffsetGoReturnAddress is an offset of `goReturnAddress` field in wazevo.executionContext - ExecutionContextOffsetGoReturnAddress Offset = 32 - // ExecutionContextOffsetStackBottomPtr is an offset of `stackBottomPtr` field in wazevo.executionContext - ExecutionContextOffsetStackBottomPtr Offset = 40 - // ExecutionContextOffsetGoCallReturnAddress is an offset of `goCallReturnAddress` field in wazevo.executionContext - ExecutionContextOffsetGoCallReturnAddress Offset = 48 - // ExecutionContextOffsetStackPointerBeforeGoCall is an offset of `StackPointerBeforeGoCall` field in wazevo.executionContext - ExecutionContextOffsetStackPointerBeforeGoCall Offset = 56 - // ExecutionContextOffsetStackGrowRequiredSize is an offset of `stackGrowRequiredSize` field in wazevo.executionContext - ExecutionContextOffsetStackGrowRequiredSize Offset = 64 - // ExecutionContextOffsetMemoryGrowTrampolineAddress is an offset of `memoryGrowTrampolineAddress` field in wazevo.executionContext - ExecutionContextOffsetMemoryGrowTrampolineAddress Offset = 72 - // ExecutionContextOffsetStackGrowCallTrampolineAddress is an offset of `stackGrowCallTrampolineAddress` field in wazevo.executionContext. - ExecutionContextOffsetStackGrowCallTrampolineAddress Offset = 80 - // ExecutionContextOffsetCheckModuleExitCodeTrampolineAddress is an offset of `checkModuleExitCodeTrampolineAddress` field in wazevo.executionContext. - ExecutionContextOffsetCheckModuleExitCodeTrampolineAddress Offset = 88 - // ExecutionContextOffsetSavedRegistersBegin is an offset of the first element of `savedRegisters` field in wazevo.executionContext - ExecutionContextOffsetSavedRegistersBegin Offset = 96 - // ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque is an offset of `goFunctionCallCalleeModuleContextOpaque` field in wazevo.executionContext - ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque Offset = 1120 - // ExecutionContextOffsetTableGrowTrampolineAddress is an offset of `tableGrowTrampolineAddress` field in wazevo.executionContext - ExecutionContextOffsetTableGrowTrampolineAddress Offset = 1128 - // ExecutionContextOffsetRefFuncTrampolineAddress is an offset of `refFuncTrampolineAddress` field in wazevo.executionContext - ExecutionContextOffsetRefFuncTrampolineAddress Offset = 1136 - ExecutionContextOffsetMemmoveAddress Offset = 1144 - ExecutionContextOffsetFramePointerBeforeGoCall Offset = 1152 - ExecutionContextOffsetMemoryWait32TrampolineAddress Offset = 1160 - ExecutionContextOffsetMemoryWait64TrampolineAddress Offset = 1168 - ExecutionContextOffsetMemoryNotifyTrampolineAddress Offset = 1176 -) - -// ModuleContextOffsetData allows the compilers to get the information about offsets to the fields of wazevo.moduleContextOpaque, -// This is unique per module. -type ModuleContextOffsetData struct { - TotalSize int - ModuleInstanceOffset, - LocalMemoryBegin, - ImportedMemoryBegin, - ImportedFunctionsBegin, - GlobalsBegin, - TypeIDs1stElement, - TablesBegin, - BeforeListenerTrampolines1stElement, - AfterListenerTrampolines1stElement, - DataInstances1stElement, - ElementInstances1stElement Offset -} - -// ImportedFunctionOffset returns an offset of the i-th imported function. -// Each item is stored as wazevo.functionInstance whose size matches FunctionInstanceSize. -func (m *ModuleContextOffsetData) ImportedFunctionOffset(i wasm.Index) ( - executableOffset, moduleCtxOffset, typeIDOffset Offset, -) { - base := m.ImportedFunctionsBegin + Offset(i)*FunctionInstanceSize - return base, base + 8, base + 16 -} - -// GlobalInstanceOffset returns an offset of the i-th global instance. -func (m *ModuleContextOffsetData) GlobalInstanceOffset(i wasm.Index) Offset { - return m.GlobalsBegin + Offset(i)*16 -} - -// Offset represents an offset of a field of a struct. -type Offset int32 - -// U32 encodes an Offset as uint32 for convenience. -func (o Offset) U32() uint32 { - return uint32(o) -} - -// I64 encodes an Offset as int64 for convenience. -func (o Offset) I64() int64 { - return int64(o) -} - -// U64 encodes an Offset as int64 for convenience. -func (o Offset) U64() uint64 { - return uint64(o) -} - -// LocalMemoryBase returns an offset of the first byte of the local memory. -func (m *ModuleContextOffsetData) LocalMemoryBase() Offset { - return m.LocalMemoryBegin -} - -// LocalMemoryLen returns an offset of the length of the local memory buffer. -func (m *ModuleContextOffsetData) LocalMemoryLen() Offset { - if l := m.LocalMemoryBegin; l >= 0 { - return l + 8 - } - return -1 -} - -// TableOffset returns an offset of the i-th table instance. -func (m *ModuleContextOffsetData) TableOffset(tableIndex int) Offset { - return m.TablesBegin + Offset(tableIndex)*8 -} - -// NewModuleContextOffsetData creates a ModuleContextOffsetData determining the structure of moduleContextOpaque for the given Module. -// The structure is described in the comment of wazevo.moduleContextOpaque. -func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContextOffsetData { - ret := ModuleContextOffsetData{} - var offset Offset - - ret.ModuleInstanceOffset = 0 - offset += 8 - - if m.MemorySection != nil { - ret.LocalMemoryBegin = offset - // buffer base + memory size. - const localMemorySizeInOpaqueModuleContext = 16 - offset += localMemorySizeInOpaqueModuleContext - } else { - // Indicates that there's no local memory - ret.LocalMemoryBegin = -1 - } - - if m.ImportMemoryCount > 0 { - offset = align8(offset) - // *wasm.MemoryInstance + imported memory's owner (moduleContextOpaque) - const importedMemorySizeInOpaqueModuleContext = 16 - ret.ImportedMemoryBegin = offset - offset += importedMemorySizeInOpaqueModuleContext - } else { - // Indicates that there's no imported memory - ret.ImportedMemoryBegin = -1 - } - - if m.ImportFunctionCount > 0 { - offset = align8(offset) - ret.ImportedFunctionsBegin = offset - // Each function is stored wazevo.functionInstance. - size := int(m.ImportFunctionCount) * FunctionInstanceSize - offset += Offset(size) - } else { - ret.ImportedFunctionsBegin = -1 - } - - if globals := int(m.ImportGlobalCount) + len(m.GlobalSection); globals > 0 { - // Align to 16 bytes for globals, as f32/f64/v128 might be loaded via SIMD instructions. - offset = align16(offset) - ret.GlobalsBegin = offset - // Pointers to *wasm.GlobalInstance. - offset += Offset(globals) * 16 - } else { - ret.GlobalsBegin = -1 - } - - if tables := len(m.TableSection) + int(m.ImportTableCount); tables > 0 { - offset = align8(offset) - ret.TypeIDs1stElement = offset - offset += 8 // First element of TypeIDs. - - ret.TablesBegin = offset - // Pointers to *wasm.TableInstance. - offset += Offset(tables) * 8 - } else { - ret.TypeIDs1stElement = -1 - ret.TablesBegin = -1 - } - - if withListener { - offset = align8(offset) - ret.BeforeListenerTrampolines1stElement = offset - offset += 8 // First element of BeforeListenerTrampolines. - - ret.AfterListenerTrampolines1stElement = offset - offset += 8 // First element of AfterListenerTrampolines. - } else { - ret.BeforeListenerTrampolines1stElement = -1 - ret.AfterListenerTrampolines1stElement = -1 - } - - ret.DataInstances1stElement = offset - offset += 8 // First element of DataInstances. - - ret.ElementInstances1stElement = offset - offset += 8 // First element of ElementInstances. - - ret.TotalSize = int(align16(offset)) - return ret -} - -func align16(o Offset) Offset { - return (o + 15) &^ 15 -} - -func align8(o Offset) Offset { - return (o + 7) &^ 7 -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap.go deleted file mode 100644 index 642c7f75d..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap.go +++ /dev/null @@ -1,96 +0,0 @@ -package wazevoapi - -import ( - "fmt" - "os" - "strconv" - "sync" -) - -var PerfMap *Perfmap - -func init() { - if PerfMapEnabled { - pid := os.Getpid() - filename := "/tmp/perf-" + strconv.Itoa(pid) + ".map" - - fh, err := os.OpenFile(filename, os.O_APPEND|os.O_RDWR|os.O_CREATE, 0o644) - if err != nil { - panic(err) - } - - PerfMap = &Perfmap{fh: fh} - } -} - -// Perfmap holds perfmap entries to be flushed into a perfmap file. -type Perfmap struct { - entries []entry - mux sync.Mutex - fh *os.File -} - -type entry struct { - index int - offset int64 - size uint64 - name string -} - -func (f *Perfmap) Lock() { - f.mux.Lock() -} - -func (f *Perfmap) Unlock() { - f.mux.Unlock() -} - -// AddModuleEntry adds a perfmap entry into the perfmap file. -// index is the index of the function in the module, offset is the offset of the function in the module, -// size is the size of the function, and name is the name of the function. -// -// Note that the entries are not flushed into the perfmap file until Flush is called, -// and the entries are module-scoped; Perfmap must be locked until Flush is called. -func (f *Perfmap) AddModuleEntry(index int, offset int64, size uint64, name string) { - e := entry{index: index, offset: offset, size: size, name: name} - if f.entries == nil { - f.entries = []entry{e} - return - } - f.entries = append(f.entries, e) -} - -// Flush writes the perfmap entries into the perfmap file where the entries are adjusted by the given `addr` and `functionOffsets`. -func (f *Perfmap) Flush(addr uintptr, functionOffsets []int) { - defer func() { - _ = f.fh.Sync() - }() - - for _, e := range f.entries { - if _, err := f.fh.WriteString(fmt.Sprintf("%x %s %s\n", - uintptr(e.offset)+addr+uintptr(functionOffsets[e.index]), - strconv.FormatUint(e.size, 16), - e.name, - )); err != nil { - panic(err) - } - } - f.entries = f.entries[:0] -} - -// Clear clears the perfmap entries not yet flushed. -func (f *Perfmap) Clear() { - f.entries = f.entries[:0] -} - -// AddEntry writes a perfmap entry directly into the perfmap file, not using the entries. -func (f *Perfmap) AddEntry(addr uintptr, size uint64, name string) { - _, err := f.fh.WriteString(fmt.Sprintf("%x %s %s\n", - addr, - strconv.FormatUint(size, 16), - name, - )) - if err != nil { - panic(err) - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_disabled.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_disabled.go deleted file mode 100644 index bcc4e545c..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_disabled.go +++ /dev/null @@ -1,5 +0,0 @@ -//go:build !perfmap - -package wazevoapi - -const PerfMapEnabled = false diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_enabled.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_enabled.go deleted file mode 100644 index 2a39879ec..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_enabled.go +++ /dev/null @@ -1,5 +0,0 @@ -//go:build perfmap - -package wazevoapi - -const PerfMapEnabled = true diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go deleted file mode 100644 index 313e34f9a..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go +++ /dev/null @@ -1,215 +0,0 @@ -package wazevoapi - -const poolPageSize = 128 - -// Pool is a pool of T that can be allocated and reset. -// This is useful to avoid unnecessary allocations. -type Pool[T any] struct { - pages []*[poolPageSize]T - resetFn func(*T) - allocated, index int -} - -// NewPool returns a new Pool. -// resetFn is called when a new T is allocated in Pool.Allocate. -func NewPool[T any](resetFn func(*T)) Pool[T] { - var ret Pool[T] - ret.resetFn = resetFn - ret.Reset() - return ret -} - -// Allocated returns the number of allocated T currently in the pool. -func (p *Pool[T]) Allocated() int { - return p.allocated -} - -// Allocate allocates a new T from the pool. -func (p *Pool[T]) Allocate() *T { - if p.index == poolPageSize { - if len(p.pages) == cap(p.pages) { - p.pages = append(p.pages, new([poolPageSize]T)) - } else { - i := len(p.pages) - p.pages = p.pages[:i+1] - if p.pages[i] == nil { - p.pages[i] = new([poolPageSize]T) - } - } - p.index = 0 - } - ret := &p.pages[len(p.pages)-1][p.index] - if p.resetFn != nil { - p.resetFn(ret) - } - p.index++ - p.allocated++ - return ret -} - -// View returns the pointer to i-th item from the pool. -func (p *Pool[T]) View(i int) *T { - page, index := i/poolPageSize, i%poolPageSize - return &p.pages[page][index] -} - -// Reset resets the pool. -func (p *Pool[T]) Reset() { - p.pages = p.pages[:0] - p.index = poolPageSize - p.allocated = 0 -} - -// IDedPool is a pool of T that can be allocated and reset, with a way to get T by an ID. -type IDedPool[T any] struct { - pool Pool[T] - idToItems []*T - maxIDEncountered int -} - -// NewIDedPool returns a new IDedPool. -func NewIDedPool[T any](resetFn func(*T)) IDedPool[T] { - return IDedPool[T]{pool: NewPool[T](resetFn), maxIDEncountered: -1} -} - -// GetOrAllocate returns the T with the given id. -func (p *IDedPool[T]) GetOrAllocate(id int) *T { - if p.maxIDEncountered < id { - p.maxIDEncountered = id - } - if id >= len(p.idToItems) { - p.idToItems = append(p.idToItems, make([]*T, id-len(p.idToItems)+1)...) - } - if p.idToItems[id] == nil { - p.idToItems[id] = p.pool.Allocate() - } - return p.idToItems[id] -} - -// Get returns the T with the given id, or nil if it's not allocated. -func (p *IDedPool[T]) Get(id int) *T { - if id >= len(p.idToItems) { - return nil - } - return p.idToItems[id] -} - -// Reset resets the pool. -func (p *IDedPool[T]) Reset() { - p.pool.Reset() - for i := 0; i <= p.maxIDEncountered; i++ { - p.idToItems[i] = nil - } - p.maxIDEncountered = -1 -} - -// MaxIDEncountered returns the maximum id encountered so far. -func (p *IDedPool[T]) MaxIDEncountered() int { - return p.maxIDEncountered -} - -// arraySize is the size of the array used in VarLengthPool's arrayPool. -// This is chosen to be 8, which is empirically a good number among 8, 12, 16 and 20. -const arraySize = 8 - -// VarLengthPool is a pool of VarLength[T] that can be allocated and reset. -type ( - VarLengthPool[T any] struct { - arrayPool Pool[varLengthPoolArray[T]] - slicePool Pool[[]T] - } - // varLengthPoolArray wraps an array and keeps track of the next index to be used to avoid the heap allocation. - varLengthPoolArray[T any] struct { - arr [arraySize]T - next int - } -) - -// VarLength is a variable length array that can be reused via a pool. -type VarLength[T any] struct { - arr *varLengthPoolArray[T] - slc *[]T -} - -// NewVarLengthPool returns a new VarLengthPool. -func NewVarLengthPool[T any]() VarLengthPool[T] { - return VarLengthPool[T]{ - arrayPool: NewPool[varLengthPoolArray[T]](func(v *varLengthPoolArray[T]) { - v.next = 0 - }), - slicePool: NewPool[[]T](func(i *[]T) { - *i = (*i)[:0] - }), - } -} - -// NewNilVarLength returns a new VarLength[T] with a nil backing. -func NewNilVarLength[T any]() VarLength[T] { - return VarLength[T]{} -} - -// Allocate allocates a new VarLength[T] from the pool. -func (p *VarLengthPool[T]) Allocate(knownMin int) VarLength[T] { - if knownMin <= arraySize { - arr := p.arrayPool.Allocate() - return VarLength[T]{arr: arr} - } - slc := p.slicePool.Allocate() - return VarLength[T]{slc: slc} -} - -// Reset resets the pool. -func (p *VarLengthPool[T]) Reset() { - p.arrayPool.Reset() - p.slicePool.Reset() -} - -// Append appends items to the backing slice just like the `append` builtin function in Go. -func (i VarLength[T]) Append(p *VarLengthPool[T], items ...T) VarLength[T] { - if i.slc != nil { - *i.slc = append(*i.slc, items...) - return i - } - - if i.arr == nil { - i.arr = p.arrayPool.Allocate() - } - - arr := i.arr - if arr.next+len(items) <= arraySize { - for _, item := range items { - arr.arr[arr.next] = item - arr.next++ - } - } else { - slc := p.slicePool.Allocate() - // Copy the array to the slice. - for ptr := 0; ptr < arr.next; ptr++ { - *slc = append(*slc, arr.arr[ptr]) - } - i.slc = slc - *i.slc = append(*i.slc, items...) - } - return i -} - -// View returns the backing slice. -func (i VarLength[T]) View() []T { - if i.slc != nil { - return *i.slc - } else if i.arr != nil { - arr := i.arr - return arr.arr[:arr.next] - } - return nil -} - -// Cut cuts the backing slice to the given length. -// Precondition: n <= len(i.backing). -func (i VarLength[T]) Cut(n int) { - if i.slc != nil { - *i.slc = (*i.slc)[:n] - } else if i.arr != nil { - i.arr.next = n - } -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/ptr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/ptr.go deleted file mode 100644 index f21e1a5d8..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/ptr.go +++ /dev/null @@ -1,15 +0,0 @@ -package wazevoapi - -import "unsafe" - -// PtrFromUintptr resurrects the original *T from the given uintptr. -// The caller of this function MUST be sure that ptr is valid. -func PtrFromUintptr[T any](ptr uintptr) *T { - // Wraps ptrs as the double pointer in order to avoid the unsafe access as detected by race detector. - // - // For example, if we have (*function)(unsafe.Pointer(ptr)) instead, then the race detector's "checkptr" - // subroutine wanrs as "checkptr: pointer arithmetic result points to invalid allocation" - // https://github.com/golang/go/blob/1ce7fcf139417d618c2730010ede2afb41664211/src/runtime/checkptr.go#L69 - var wrapped *uintptr = &ptr - return *(**T)(unsafe.Pointer(wrapped)) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/queue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/queue.go deleted file mode 100644 index e3118fa69..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/queue.go +++ /dev/null @@ -1,26 +0,0 @@ -package wazevoapi - -// Queue is the resettable queue where the underlying slice is reused. -type Queue[T any] struct { - index int - Data []T -} - -func (q *Queue[T]) Enqueue(v T) { - q.Data = append(q.Data, v) -} - -func (q *Queue[T]) Dequeue() (ret T) { - ret = q.Data[q.index] - q.index++ - return -} - -func (q *Queue[T]) Empty() bool { - return q.index >= len(q.Data) -} - -func (q *Queue[T]) Reset() { - q.index = 0 - q.Data = q.Data[:0] -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go deleted file mode 100644 index 3fc7aa143..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go +++ /dev/null @@ -1,11 +0,0 @@ -package wazevoapi - -// ResetMap resets the map to an empty state, or creates a new map if it is nil. -func ResetMap[K comparable, V any](m map[K]V) map[K]V { - if m == nil { - m = make(map[K]V) - } else { - clear(m) - } - return m -} |