diff options
Diffstat (limited to 'vendor/github.com/tetratelabs/wazero/internal/engine')
23 files changed, 1311 insertions, 654 deletions
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go index 4e20e4b2c..4269d237b 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go @@ -814,6 +814,7 @@ operatorSwitch: c.emit( newOperationCallIndirect(typeIndex, tableIndex), ) + case wasm.OpcodeDrop: r := inclusiveRange{Start: 0, End: 0} if peekValueType == unsignedTypeV128 { @@ -3423,6 +3424,45 @@ operatorSwitch: default: return fmt.Errorf("unsupported atomic instruction in interpreterir: %s", wasm.AtomicInstructionName(atomicOp)) } + + case wasm.OpcodeTailCallReturnCall: + fdef := c.module.FunctionDefinition(index) + functionFrame := c.controlFrames.functionFrame() + // Currently we do not support imported functions, we treat them as regular calls. + // For details, see internal/engine/RATIONALE.md + if _, _, isImport := fdef.Import(); isImport { + c.emit(newOperationCall(index)) + dropOp := newOperationDrop(c.getFrameDropRange(functionFrame, false)) + + // Cleanup the stack and then jmp to function frame's continuation (meaning return). + c.emit(dropOp) + c.emit(newOperationBr(functionFrame.asLabel())) + } else { + c.emit(newOperationTailCallReturnCall(index)) + } + + // Return operation is stack-polymorphic, and mark the state as unreachable. + // That means subsequent instructions in the current control frame are "unreachable" + // and can be safely removed. + c.markUnreachable() + + case wasm.OpcodeTailCallReturnCallIndirect: + typeIndex := index + tableIndex, n, err := leb128.LoadUint32(c.body[c.pc+1:]) + if err != nil { + return fmt.Errorf("read target for br_table: %w", err) + } + c.pc += n + + functionFrame := c.controlFrames.functionFrame() + dropRange := c.getFrameDropRange(functionFrame, false) + c.emit(newOperationTailCallReturnCallIndirect(typeIndex, tableIndex, dropRange, functionFrame.asLabel())) + + // Return operation is stack-polymorphic, and mark the state as unreachable. + // That means subsequent instructions in the current control frame are "unreachable" + // and can be safely removed. + c.markUnreachable() + default: return fmt.Errorf("unsupported instruction in interpreterir: 0x%x", op) } @@ -3449,7 +3489,10 @@ func (c *compiler) applyToStack(opcode wasm.Opcode) (index uint32, err error) { wasm.OpcodeLocalSet, wasm.OpcodeLocalTee, wasm.OpcodeGlobalGet, - wasm.OpcodeGlobalSet: + wasm.OpcodeGlobalSet, + // tail-call proposal + wasm.OpcodeTailCallReturnCall, + wasm.OpcodeTailCallReturnCallIndirect: // Assumes that we are at the opcode now so skip it before read immediates. v, num, err := leb128.LoadUint32(c.body[c.pc+1:]) if err != nil { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go index 5b5e6e9d0..6f2fa949a 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go @@ -7,6 +7,7 @@ import ( "fmt" "math" "math/bits" + "slices" "sync" "unsafe" @@ -27,27 +28,37 @@ import ( // The default value should suffice for most use cases. Those wishing to change this can via `go build -ldflags`. var callStackCeiling = 2000 +type compiledFunctionWithCount struct { + funcs []compiledFunction + refCount int +} + // engine is an interpreter implementation of wasm.Engine type engine struct { enabledFeatures api.CoreFeatures - compiledFunctions map[wasm.ModuleID][]compiledFunction // guarded by mutex. - mux sync.RWMutex + compiledFunctions map[wasm.ModuleID]*compiledFunctionWithCount // guarded by mutex. + mux sync.Mutex } func NewEngine(_ context.Context, enabledFeatures api.CoreFeatures, _ filecache.Cache) wasm.Engine { return &engine{ enabledFeatures: enabledFeatures, - compiledFunctions: map[wasm.ModuleID][]compiledFunction{}, + compiledFunctions: map[wasm.ModuleID]*compiledFunctionWithCount{}, } } // Close implements the same method as documented on wasm.Engine. func (e *engine) Close() (err error) { + e.mux.Lock() + defer e.mux.Unlock() + clear(e.compiledFunctions) return } // CompiledModuleCount implements the same method as documented on wasm.Engine. func (e *engine) CompiledModuleCount() uint32 { + e.mux.Lock() + defer e.mux.Unlock() return uint32(len(e.compiledFunctions)) } @@ -59,19 +70,33 @@ func (e *engine) DeleteCompiledModule(m *wasm.Module) { func (e *engine) deleteCompiledFunctions(module *wasm.Module) { e.mux.Lock() defer e.mux.Unlock() + cf, ok := e.compiledFunctions[module.ID] + if !ok { + return + } + cf.refCount-- + if cf.refCount > 0 { + return + } delete(e.compiledFunctions, module.ID) } func (e *engine) addCompiledFunctions(module *wasm.Module, fs []compiledFunction) { e.mux.Lock() defer e.mux.Unlock() - e.compiledFunctions[module.ID] = fs + e.compiledFunctions[module.ID] = &compiledFunctionWithCount{funcs: fs, refCount: 1} } -func (e *engine) getCompiledFunctions(module *wasm.Module) (fs []compiledFunction, ok bool) { - e.mux.RLock() - defer e.mux.RUnlock() - fs, ok = e.compiledFunctions[module.ID] +func (e *engine) getCompiledFunctions(module *wasm.Module, increaseRefCount bool) (fs []compiledFunction, ok bool) { + e.mux.Lock() + defer e.mux.Unlock() + cf, ok := e.compiledFunctions[module.ID] + if ok { + fs = cf.funcs + if increaseRefCount { + cf.refCount++ + } + } return } @@ -242,15 +267,9 @@ type snapshot struct { // Snapshot implements the same method as documented on experimental.Snapshotter. func (ce *callEngine) Snapshot() experimental.Snapshot { - stack := make([]uint64, len(ce.stack)) - copy(stack, ce.stack) - - frames := make([]*callFrame, len(ce.frames)) - copy(frames, ce.frames) - return &snapshot{ - stack: stack, - frames: frames, + stack: slices.Clone(ce.stack), + frames: slices.Clone(ce.frames), ce: ce, } } @@ -356,7 +375,7 @@ const callFrameStackSize = 0 // CompileModule implements the same method as documented on wasm.Engine. func (e *engine) CompileModule(_ context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) error { - if _, ok := e.getCompiledFunctions(module); ok { // cache hit! + if _, ok := e.getCompiledFunctions(module, true); ok { // cache hit! return nil } @@ -405,7 +424,7 @@ func (e *engine) NewModuleEngine(module *wasm.Module, instance *wasm.ModuleInsta functions: make([]function, len(module.FunctionSection)+int(module.ImportFunctionCount)), } - codes, ok := e.getCompiledFunctions(module) + codes, ok := e.getCompiledFunctions(module, false) if !ok { return nil, errors.New("source module must be compiled before instantiation") } @@ -427,12 +446,10 @@ func (e *engine) NewModuleEngine(module *wasm.Module, instance *wasm.ModuleInsta // lowerIR lowers the interpreterir operations to engine friendly struct. func (e *engine) lowerIR(ir *compilationResult, ret *compiledFunction) error { // Copy the body from the result. - ret.body = make([]unionOperation, len(ir.Operations)) - copy(ret.body, ir.Operations) + ret.body = slices.Clone(ir.Operations) // Also copy the offsets if necessary. if offsets := ir.IROperationSourceOffsetsInWasmBinary; len(offsets) > 0 { - ret.offsetsInWasmBinary = make([]uint64, len(offsets)) - copy(ret.offsetsInWasmBinary, offsets) + ret.offsetsInWasmBinary = slices.Clone(offsets) } labelAddressResolutions := [labelKindNum][]uint64{} @@ -449,9 +466,7 @@ func (e *engine) lowerIR(ir *compilationResult, ret *compiledFunction) error { frameToAddresses := labelAddressResolutions[label.Kind()] // Expand the slice if necessary. if diff := fid - len(frameToAddresses) + 1; diff > 0 { - for j := 0; j < diff; j++ { - frameToAddresses = append(frameToAddresses, 0) - } + frameToAddresses = append(frameToAddresses, make([]uint64, diff)...) } frameToAddresses[fid] = address labelAddressResolutions[kind] = frameToAddresses @@ -472,6 +487,8 @@ func (e *engine) lowerIR(ir *compilationResult, ret *compiledFunction) error { target := op.Us[j] e.setLabelAddress(&op.Us[j], label(target), labelAddressResolutions) } + case operationKindTailCallReturnCallIndirect: + e.setLabelAddress(&op.Us[1], label(op.Us[1]), labelAddressResolutions) } } return nil @@ -761,18 +778,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance case operationKindCallIndirect: offset := ce.popValue() table := tables[op.U2] - if offset >= uint64(len(table.References)) { - panic(wasmruntime.ErrRuntimeInvalidTableAccess) - } - rawPtr := table.References[offset] - if rawPtr == 0 { - panic(wasmruntime.ErrRuntimeInvalidTableAccess) - } - - tf := functionFromUintptr(rawPtr) - if tf.typeID != typeIDs[op.U1] { - panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch) - } + tf := ce.functionForOffset(table, offset, typeIDs[op.U1]) ce.callFunction(ctx, f.moduleInstance, tf) frame.pc++ @@ -1725,12 +1731,17 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance if fillSize+offset > uint64(len(memoryInst.Buffer)) { panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess) } else if fillSize != 0 { - // Uses the copy trick for faster filling buffer. - // https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d + // Uses the copy trick for faster filling the buffer with the value. + // https://github.com/golang/go/blob/go1.24.0/src/bytes/bytes.go#L664-L673 buf := memoryInst.Buffer[offset : offset+fillSize] - buf[0] = value - for i := 1; i < len(buf); i *= 2 { - copy(buf[i:], buf[:i]) + if value == 0 { + clear(buf) + } else { + buf[0] = value + for i := 1; i < len(buf); { + chunk := min(i, 8192) + i += copy(buf[i:], buf[:chunk]) + } } } frame.pc++ @@ -1804,7 +1815,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance panic(wasmruntime.ErrRuntimeInvalidTableAccess) } else if num > 0 { // Uses the copy trick for faster filling the region with the value. - // https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d + // https://github.com/golang/go/blob/go1.24.0/src/slices/slices.go#L514-L517 targetRegion := table.References[offset : offset+num] targetRegion[0] = ref for i := 1; i < len(targetRegion); i *= 2 { @@ -4331,6 +4342,32 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance memoryInst.Mux.Unlock() ce.pushValue(uint64(old)) frame.pc++ + case operationKindTailCallReturnCall: + f := &functions[op.U1] + ce.dropForTailCall(frame, f) + body, bodyLen = ce.resetPc(frame, f) + + case operationKindTailCallReturnCallIndirect: + offset := ce.popValue() + table := tables[op.U2] + tf := ce.functionForOffset(table, offset, typeIDs[op.U1]) + + // We are allowing proper tail calls only across functions that belong to the same + // module; for indirect calls, we have to enforce it at run-time. + // For details, see internal/engine/RATIONALE.md + if tf.moduleInstance != f.moduleInstance { + // Revert to a normal call. + ce.callFunction(ctx, f.moduleInstance, tf) + // Return + ce.drop(op.Us[0]) + // Jump to the function frame (return) + frame.pc = op.Us[1] + continue + } + + ce.dropForTailCall(frame, tf) + body, bodyLen = ce.resetPc(frame, tf) + default: frame.pc++ } @@ -4338,6 +4375,40 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance ce.popFrame() } +func (ce *callEngine) dropForTailCall(frame *callFrame, f *function) { + base := frame.base - frame.f.funcType.ParamNumInUint64 + paramCount := f.funcType.ParamNumInUint64 + ce.stack = append(ce.stack[:base], ce.stack[len(ce.stack)-paramCount:]...) +} + +func (ce *callEngine) resetPc(frame *callFrame, f *function) (body []unionOperation, bodyLen uint64) { + // The compiler is currently allowing proper tail call only across functions + // that belong to the same module; thus, we can overwrite the frame in-place. + // For details, see internal/engine/RATIONALE.md + frame.f = f + frame.base = len(ce.stack) + frame.pc = 0 + body = frame.f.parent.body + bodyLen = uint64(len(body)) + return body, bodyLen +} + +func (ce *callEngine) functionForOffset(table *wasm.TableInstance, offset uint64, expectedTypeID wasm.FunctionTypeID) *function { + if offset >= uint64(len(table.References)) { + panic(wasmruntime.ErrRuntimeInvalidTableAccess) + } + rawPtr := table.References[offset] + if rawPtr == 0 { + panic(wasmruntime.ErrRuntimeInvalidTableAccess) + } + + tf := functionFromUintptr(rawPtr) + if tf.typeID != expectedTypeID { + panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch) + } + return tf +} + func wasmCompatMax32bits(v1, v2 uint32) uint64 { return uint64(math.Float32bits(moremath.WasmCompatMax32( math.Float32frombits(v1), @@ -4564,9 +4635,7 @@ func (ce *callEngine) callGoFuncWithStack(ctx context.Context, m *wasm.ModuleIns // In the interpreter engine, ce.stack may only have capacity to store // parameters. Grow when there are more results than parameters. if growLen := resultLen - paramLen; growLen > 0 { - for i := 0; i < growLen; i++ { - ce.stack = append(ce.stack, 0) - } + ce.stack = append(ce.stack, make([]uint64, growLen)...) stackLen += growLen } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go index 3087a718f..db3cfa250 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go @@ -445,6 +445,10 @@ func (o operationKind) String() (ret string) { ret = "operationKindAtomicRMW8Cmpxchg" case operationKindAtomicRMW16Cmpxchg: ret = "operationKindAtomicRMW16Cmpxchg" + case operationKindTailCallReturnCall: + ret = "operationKindTailCallReturnCall" + case operationKindTailCallReturnCallIndirect: + ret = "operationKindTailCallReturnCallIndirect" default: panic(fmt.Errorf("unknown operation %d", o)) } @@ -768,6 +772,11 @@ const ( // operationKindAtomicRMW16Cmpxchg is the kind for NewOperationAtomicRMW16Cmpxchg. operationKindAtomicRMW16Cmpxchg + // operationKindTailCallReturnCall is the Kind for newOperationTailCallReturnCall. + operationKindTailCallReturnCall + // operationKindTailCallReturnCallIndirect is the Kind for newOperationKindTailCallReturnCallIndirect. + operationKindTailCallReturnCallIndirect + // operationKindEnd is always placed at the bottom of this iota definition to be used in the test. operationKindEnd ) @@ -1097,6 +1106,12 @@ func (o unionOperation) String() string { operationKindAtomicRMW16Cmpxchg: return o.Kind.String() + case operationKindTailCallReturnCall: + return fmt.Sprintf("%s %d %s", o.Kind, o.U1, label(o.U2).String()) + + case operationKindTailCallReturnCallIndirect: + return fmt.Sprintf("%s %d %d", o.Kind, o.U1, o.U2) + default: panic(fmt.Sprintf("TODO: %v", o.Kind)) } @@ -2810,3 +2825,21 @@ func newOperationAtomicRMW8Cmpxchg(unsignedType unsignedType, arg memoryArg) uni func newOperationAtomicRMW16Cmpxchg(unsignedType unsignedType, arg memoryArg) unionOperation { return unionOperation{Kind: operationKindAtomicRMW16Cmpxchg, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)} } + +// newOperationTailCallReturnCall is a constructor for unionOperation with operationKindTailCallReturnCall. +// +// This corresponds to +// +// wasm.OpcodeTailCallReturnCall. +func newOperationTailCallReturnCall(functionIndex uint32) unionOperation { + return unionOperation{Kind: operationKindTailCallReturnCall, U1: uint64(functionIndex)} +} + +// NewOperationCallIndirect is a constructor for unionOperation with operationKindTailCallReturnCallIndirect. +// +// This corresponds to +// +// wasm.OpcodeTailCallReturnCallIndirect. +func newOperationTailCallReturnCallIndirect(typeIndex, tableIndex uint32, dropDepth inclusiveRange, l label) unionOperation { + return unionOperation{Kind: operationKindTailCallReturnCallIndirect, U1: uint64(typeIndex), U2: uint64(tableIndex), Us: []uint64{dropDepth.AsU64(), uint64(l)}} +} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go index 7b9d5602d..da5ca3c15 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go @@ -272,9 +272,9 @@ func (c *compiler) wasmOpcodeSignature(op wasm.Opcode, index uint32) (*signature return signature_I32_None, nil case wasm.OpcodeReturn: return signature_None_None, nil - case wasm.OpcodeCall: + case wasm.OpcodeCall, wasm.OpcodeTailCallReturnCall: return c.funcTypeToSigs.get(c.funcs[index], false /* direct */), nil - case wasm.OpcodeCallIndirect: + case wasm.OpcodeCallIndirect, wasm.OpcodeTailCallReturnCallIndirect: return c.funcTypeToSigs.get(index, true /* call_indirect */), nil case wasm.OpcodeDrop: return signature_Unknown_None, nil diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go index 62d365015..8e3f08efc 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go @@ -88,7 +88,7 @@ type Compiler interface { MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode // AddRelocationInfo appends the relocation information for the function reference at the current buffer offset. - AddRelocationInfo(funcRef ssa.FuncRef) + AddRelocationInfo(funcRef ssa.FuncRef, isTailCall bool) // AddSourceOffsetInfo appends the source offset information for the given offset. AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset) @@ -115,6 +115,8 @@ type RelocationInfo struct { Offset int64 // Target is the target function of the call instruction. FuncRef ssa.FuncRef + // IsTailCall indicates whether the call instruction is a tail call. + IsTailCall bool } // compiler implements Compiler. @@ -352,10 +354,11 @@ func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo { } // AddRelocationInfo implements Compiler.AddRelocationInfo. -func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef) { +func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef, isTailCall bool) { c.relocations = append(c.relocations, RelocationInfo{ - Offset: int64(len(c.buf)), - FuncRef: funcRef, + Offset: int64(len(c.buf)), + FuncRef: funcRef, + IsTailCall: isTailCall, }) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go index 6a3e58f51..901c87aaf 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go @@ -21,7 +21,9 @@ type instruction struct { func (i *instruction) IsCall() bool { return i.kind == call } // IsIndirectCall implements regalloc.Instr. -func (i *instruction) IsIndirectCall() bool { return i.kind == callIndirect } +func (i *instruction) IsIndirectCall() bool { + return i.kind == callIndirect +} // IsReturn implements regalloc.Instr. func (i *instruction) IsReturn() bool { return i.kind == ret } @@ -288,6 +290,11 @@ func (i *instruction) String() string { case nopUseReg: return fmt.Sprintf("nop_use_reg %s", i.op1.format(true)) + case tailCall: + return fmt.Sprintf("tailCall %s", ssa.FuncRef(i.u1)) + case tailCallIndirect: + return fmt.Sprintf("tailCallIndirect %s", i.op1.format(true)) + default: panic(fmt.Sprintf("BUG: %d", int(i.kind))) } @@ -357,7 +364,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { default: panic(fmt.Sprintf("BUG: invalid operand: %s", i)) } - case useKindCallInd: + case useKindCallInd, useKindTailCallInd: op := i.op1 switch op.kind { case operandKindReg: @@ -428,13 +435,16 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { func (i *instruction) AssignUse(index int, v regalloc.VReg) { switch uk := useKinds[i.kind]; uk { case useKindNone: - case useKindCallInd: + case useKindCallInd, useKindTailCallInd: if index != 0 { panic("BUG") } op := &i.op1 switch op.kind { case operandKindReg: + if uk == useKindTailCallInd && v != r11VReg { + panic("BUG") + } op.setReg(v) case operandKindMem: op.addressMode().assignUses(index, v) @@ -838,6 +848,12 @@ const ( // nopUseReg is a meta instruction that uses one register and does nothing. nopUseReg + // tailCall is a meta instruction that emits a tail call. + tailCall + + // tailCallIndirect is a meta instruction that emits a tail call with an indirect call. + tailCallIndirect + instrMax ) @@ -1079,6 +1095,10 @@ func (k instructionKind) String() string { return "lockcmpxchg" case lockxadd: return "lockxadd" + case tailCall: + return "tailCall" + case tailCallIndirect: + return "tailCallIndirect" default: panic("BUG") } @@ -1173,6 +1193,27 @@ func (i *instruction) asCallIndirect(ptr operand, abi *backend.FunctionABI) *ins return i } +func (i *instruction) asTailCallReturnCall(ref ssa.FuncRef, abi *backend.FunctionABI) *instruction { + i.kind = tailCall + i.u1 = uint64(ref) + if abi != nil { + i.u2 = abi.ABIInfoAsUint64() + } + return i +} + +func (i *instruction) asTailCallReturnCallIndirect(ptr operand, abi *backend.FunctionABI) *instruction { + if ptr.kind != operandKindReg && ptr.kind != operandKindMem { + panic("BUG") + } + i.kind = tailCallIndirect + i.op1 = ptr + if abi != nil { + i.u2 = abi.ABIInfoAsUint64() + } + return i +} + func (i *instruction) asRet() *instruction { i.kind = ret return i @@ -2342,6 +2383,8 @@ var defKinds = [instrMax]defKind{ lockxadd: defKindNone, neg: defKindNone, nopUseReg: defKindNone, + tailCall: defKindCall, + tailCallIndirect: defKindCall, } // String implements fmt.Stringer. @@ -2375,6 +2418,7 @@ const ( useKindBlendvpd useKindCall useKindCallInd + useKindTailCallInd useKindFcvtToSintSequence useKindFcvtToUintSequence ) @@ -2425,6 +2469,8 @@ var useKinds = [instrMax]useKind{ lockxadd: useKindOp1RegOp2, neg: useKindOp1, nopUseReg: useKindOp1, + tailCall: useKindCall, + tailCallIndirect: useKindTailCallInd, } func (u useKind) String() string { @@ -2441,6 +2487,8 @@ func (u useKind) String() string { return "call" case useKindCallInd: return "callInd" + case useKindTailCallInd: + return "tailCallInd" default: return "invalid" } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go index 6637b428c..d1eefbdb5 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go @@ -1211,7 +1211,7 @@ func (i *instruction) encode(c backend.Compiler) (needsLabelResolution bool) { case call: c.EmitByte(0xe8) // Meaning that the call target is a function value, and requires relocation. - c.AddRelocationInfo(ssa.FuncRef(i.u1)) + c.AddRelocationInfo(ssa.FuncRef(i.u1), false) // Note that this is zero as a placeholder for the call target if it's a function value. c.Emit4Bytes(uint32(i.u2)) @@ -1244,6 +1244,37 @@ func (i *instruction) encode(c backend.Compiler) (needsLabelResolution bool) { panic("BUG: invalid operand kind") } + case tailCall: + // Encode as jmp. + c.EmitByte(0xe9) + // Meaning that the call target is a function value, and requires relocation. + c.AddRelocationInfo(ssa.FuncRef(i.u1), true) + // Note that this is zero as a placeholder for the call target if it's a function value. + c.Emit4Bytes(uint32(i.u2)) + + case tailCallIndirect: + op := i.op1 + + const opcodeNum = 1 + const opcode = 0xff + const regMemSubOpcode = 4 + rex := rexInfo(0).clearW() + switch op.kind { + // Indirect tail calls always take a register as the target. + // Note: the register should be a callee-saved register (usually r11). + case operandKindReg: + dst := regEncodings[op.reg().RealReg()] + encodeRegReg(c, + legacyPrefixesNone, + opcode, opcodeNum, + regMemSubOpcode, + dst, + rex, + ) + default: + panic("BUG: invalid operand kind") + } + case xchg: src, dst := regEncodings[i.op1.reg().RealReg()], i.op2 size := i.u1 diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go index fd0d69ca9..57d9bb731 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go @@ -17,7 +17,7 @@ import ( // NewBackend returns a new backend for arm64. func NewBackend() backend.Machine { m := &machine{ - cpuFeatures: platform.CpuFeatures, + cpuFeatures: platform.CpuFeatures(), regAlloc: regalloc.NewAllocator[*instruction, *labelPosition, *regAllocFn](regInfo), spillSlots: map[regalloc.VRegID]int64{}, amodePool: wazevoapi.NewPool[amode](nil), @@ -1109,6 +1109,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { atomicOp, size := instr.AtomicRmwData() m.lowerAtomicRmw(atomicOp, addr, val, size, instr.Return()) + case ssa.OpcodeTailCallReturnCall, ssa.OpcodeTailCallReturnCallIndirect: + m.lowerTailCall(instr) + default: panic("TODO: lowering " + op.String()) } @@ -1885,31 +1888,7 @@ func (m *machine) lowerStore(si *ssa.Instruction) { func (m *machine) lowerCall(si *ssa.Instruction) { isDirectCall := si.Opcode() == ssa.OpcodeCall - var indirectCalleePtr ssa.Value - var directCallee ssa.FuncRef - var sigID ssa.SignatureID - var args []ssa.Value - var isMemmove bool - if isDirectCall { - directCallee, sigID, args = si.CallData() - } else { - indirectCalleePtr, sigID, args, isMemmove = si.CallIndirectData() - } - calleeABI := m.c.GetFunctionABI(m.c.SSABuilder().ResolveSignature(sigID)) - - stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize()) - if m.maxRequiredStackSizeForCalls < stackSlotSize+16 { - m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // 16 == return address + RBP. - } - - // Note: See machine.SetupPrologue for the stack layout. - // The stack pointer decrease/increase will be inserted later in the compilation. - - for i, arg := range args { - reg := m.c.VRegOf(arg) - def := m.c.ValueDefinition(arg) - m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize) - } + indirectCalleePtr, directCallee, isMemmove, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall) if isMemmove { // Go's memmove *might* use all xmm0-xmm15, so we need to release them. @@ -1939,6 +1918,39 @@ func (m *machine) lowerCall(si *ssa.Instruction) { m.insert(m.allocateInstr().asNopUseReg(regInfo.RealRegToVReg[rdx])) } + m.insertReturns(si, calleeABI, stackSlotSize) +} + +func (m *machine) prepareCall(si *ssa.Instruction, isDirectCall bool) (ssa.Value, ssa.FuncRef, bool, *backend.FunctionABI, int64) { + var indirectCalleePtr ssa.Value + var directCallee ssa.FuncRef + var sigID ssa.SignatureID + var args []ssa.Value + var isMemmove bool + if isDirectCall { + directCallee, sigID, args = si.CallData() + } else { + indirectCalleePtr, sigID, args, isMemmove = si.CallIndirectData() + } + calleeABI := m.c.GetFunctionABI(m.c.SSABuilder().ResolveSignature(sigID)) + + stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize()) + if m.maxRequiredStackSizeForCalls < stackSlotSize+16 { + m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // 16 == return address + RBP. + } + + // Note: See machine.SetupPrologue for the stack layout. + // The stack pointer decrease/increase will be inserted later in the compilation. + + for i, arg := range args { + reg := m.c.VRegOf(arg) + def := m.c.ValueDefinition(arg) + m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize) + } + return indirectCalleePtr, directCallee, isMemmove, calleeABI, stackSlotSize +} + +func (m *machine) insertReturns(si *ssa.Instruction, calleeABI *backend.FunctionABI, stackSlotSize int64) { var index int r1, rs := si.Returns() if r1.Valid() { @@ -1952,6 +1964,43 @@ func (m *machine) lowerCall(si *ssa.Instruction) { } } +func (m *machine) lowerTailCall(si *ssa.Instruction) { + isDirectCall := si.Opcode() == ssa.OpcodeTailCallReturnCall + indirectCalleePtr, directCallee, isMemmove, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall) + if isMemmove { + panic("memmove not supported in tail calls") + } + + isAllRegs := stackSlotSize == 0 + + switch { + case isDirectCall && isAllRegs: + call := m.allocateInstr().asTailCallReturnCall(directCallee, calleeABI) + m.insert(call) + case !isDirectCall && isAllRegs: + // In a tail call we insert the epilogue before the jump instruction, + // so an arbitrary register might be overwritten while restoring the stack. + // So, as compared to a regular indirect call, we ensure the pointer is stored + // in a caller-saved register (r11). + // For details, see internal/engine/RATIONALE.md + ptrOp := m.getOperand_Reg(m.c.ValueDefinition(indirectCalleePtr)) + tmpJmp := r11VReg + m.InsertMove(tmpJmp, ptrOp.reg(), ssa.TypeI64) + callInd := m.allocateInstr().asTailCallReturnCallIndirect(newOperandReg(tmpJmp), calleeABI) + m.insert(callInd) + case isDirectCall && !isAllRegs: + call := m.allocateInstr().asCall(directCallee, calleeABI) + m.insert(call) + case !isDirectCall && !isAllRegs: + ptrOp := m.getOperand_Mem_Reg(m.c.ValueDefinition(indirectCalleePtr)) + callInd := m.allocateInstr().asCallIndirect(ptrOp, calleeABI) + m.insert(callInd) + } + + // If this is a proper tail call, returns will be cleared in the postRegAlloc phase. + m.insertReturns(si, calleeABI, stackSlotSize) +} + // callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the // caller side of the function call. func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def backend.SSAValueDefinition, stackSlotSize int64) { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go index e53729860..fa3ca58a6 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go @@ -188,6 +188,23 @@ func (m *machine) postRegAlloc() { linkInstr(inc, next) } continue + case tailCall, tailCallIndirect: + // At this point, reg alloc is done, therefore we can safely insert dec RPS instruction + // right before the tail call (jump) instruction. If this is done before reg alloc, the stack slot + // can point to the wrong location and therefore results in a wrong value. + tailCall := cur + _, _, _, _, size := backend.ABIInfoFromUint64(tailCall.u2) + if size > 0 { + dec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(size), rspVReg, true) + linkInstr(tailCall.prev, dec) + linkInstr(dec, tailCall) + } + // In a tail call, we insert the epilogue before the jump instruction. + m.setupEpilogueAfter(tailCall.prev) + // If this has been encoded as a proper tail call, we can remove the trailing instructions + // For details, see internal/engine/RATIONALE.md + m.removeUntilRet(cur.next) + continue } // Removes the redundant copy instruction. @@ -278,6 +295,20 @@ func (m *machine) setupEpilogueAfter(cur *instruction) { linkInstr(cur, prevNext) } +// removeUntilRet removes the instructions starting from `cur` until the first `ret` instruction. +func (m *machine) removeUntilRet(cur *instruction) { + for ; cur != nil; cur = cur.next { + prev, next := cur.prev, cur.next + prev.next = next + if next != nil { + next.prev = prev + } + if cur.kind == ret { + return + } + } +} + func (m *machine) addRSP(offset int32, cur *instruction) *instruction { if offset == 0 { return cur diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go index d1eaa7cd4..c300c3d61 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go @@ -261,6 +261,23 @@ func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn reg func (m *machine) lowerCall(si *ssa.Instruction) { isDirectCall := si.Opcode() == ssa.OpcodeCall + indirectCalleePtr, directCallee, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall) + + if isDirectCall { + call := m.allocateInstr() + call.asCall(directCallee, calleeABI) + m.insert(call) + } else { + ptr := m.compiler.VRegOf(indirectCalleePtr) + callInd := m.allocateInstr() + callInd.asCallIndirect(ptr, calleeABI) + m.insert(callInd) + } + + m.insertReturns(si, calleeABI, stackSlotSize) +} + +func (m *machine) prepareCall(si *ssa.Instruction, isDirectCall bool) (ssa.Value, ssa.FuncRef, *backend.FunctionABI, int64) { var indirectCalleePtr ssa.Value var directCallee ssa.FuncRef var sigID ssa.SignatureID @@ -282,18 +299,10 @@ func (m *machine) lowerCall(si *ssa.Instruction) { def := m.compiler.ValueDefinition(arg) m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize) } + return indirectCalleePtr, directCallee, calleeABI, stackSlotSize +} - if isDirectCall { - call := m.allocateInstr() - call.asCall(directCallee, calleeABI) - m.insert(call) - } else { - ptr := m.compiler.VRegOf(indirectCalleePtr) - callInd := m.allocateInstr() - callInd.asCallIndirect(ptr, calleeABI) - m.insert(callInd) - } - +func (m *machine) insertReturns(si *ssa.Instruction, calleeABI *backend.FunctionABI, stackSlotSize int64) { var index int r1, rs := si.Returns() if r1.Valid() { @@ -307,6 +316,40 @@ func (m *machine) lowerCall(si *ssa.Instruction) { } } +func (m *machine) lowerTailCall(si *ssa.Instruction) { + isDirectCall := si.Opcode() == ssa.OpcodeTailCallReturnCall + indirectCalleePtr, directCallee, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall) + + // We currently support tail calls only when the args are passed via registers + // otherwise we fall back to a plain call. + // For details, see internal/engine/RATIONALE.md + isAllRegs := stackSlotSize == 0 + + switch { + case isDirectCall && isAllRegs: + tailJump := m.allocateInstr() + tailJump.asTailCall(directCallee, calleeABI) + m.insert(tailJump) + case !isDirectCall && isAllRegs: + ptr := m.compiler.VRegOf(indirectCalleePtr) + callInd := m.allocateInstr() + callInd.asTailCallIndirect(ptr, calleeABI) + m.insert(callInd) + case isDirectCall && !isAllRegs: + tailJump := m.allocateInstr() + tailJump.asCall(directCallee, calleeABI) + m.insert(tailJump) + case !isDirectCall && !isAllRegs: + ptr := m.compiler.VRegOf(indirectCalleePtr) + callInd := m.allocateInstr() + callInd.asCallIndirect(ptr, calleeABI) + m.insert(callInd) + } + + // If this is a proper tail call, returns will be cleared in the postRegAlloc phase. + m.insertReturns(si, calleeABI, stackSlotSize) +} + func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) { if imm12Operand, ok := asImm12Operand(uint64(diff)); ok { alu := m.allocateInstr() diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go index 1f563428a..560044673 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -140,6 +140,8 @@ var defKinds = [numInstructionKinds]defKind{ atomicStore: defKindNone, dmb: defKindNone, loadConstBlockArg: defKindRD, + tailCall: defKindCall, + tailCallInd: defKindCall, } // Defs returns the list of regalloc.VReg that are defined by the instruction. @@ -278,6 +280,8 @@ var useKinds = [numInstructionKinds]useKind{ atomicStore: useKindRNRM, loadConstBlockArg: useKindNone, dmb: useKindNone, + tailCall: useKindCall, + tailCallInd: useKindCallInd, } // Uses returns the list of regalloc.VReg that are used by the instruction. @@ -1501,6 +1505,10 @@ func (i *instruction) String() (str string) { str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64)) case dmb: str = "dmb" + case tailCall: + str = fmt.Sprintf("b %s", ssa.FuncRef(i.u1)) + case tailCallInd: + str = fmt.Sprintf("b %s", formatVRegSized(i.rn.nr(), 64)) case udf: str = "udf" case emitSourceOffsetInfo: @@ -1550,6 +1558,22 @@ func (i *instruction) asDMB() { i.kind = dmb } +func (i *instruction) asTailCall(ref ssa.FuncRef, abi *backend.FunctionABI) { + i.kind = tailCall + i.u1 = uint64(ref) + if abi != nil { + i.u2 = abi.ABIInfoAsUint64() + } +} + +func (i *instruction) asTailCallIndirect(ptr regalloc.VReg, abi *backend.FunctionABI) { + i.kind = tailCallInd + i.rn = operandNR(ptr) + if abi != nil { + i.u2 = abi.ABIInfoAsUint64() + } +} + // TODO: delete unnecessary things. const ( // nop0 represents a no-op of zero size. @@ -1727,6 +1751,10 @@ const ( atomicStore // dmb represents the data memory barrier instruction in inner-shareable (ish) mode. dmb + // tailCall represents a tail call instruction. + tailCall + // tailCallInd represents a tail call indirect instruction. + tailCallInd // UDF is the undefined instruction. For debugging only. udf // loadConstBlockArg represents a load of a constant block argument. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go index 21be9b71e..5326a5e28 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go @@ -39,7 +39,7 @@ func (i *instruction) encode(m *machine) { c.Emit4Bytes(encodeUnconditionalBranch(false, imm)) case call: // We still don't know the exact address of the function to call, so we emit a placeholder. - c.AddRelocationInfo(i.callFuncRef()) + c.AddRelocationInfo(i.callFuncRef(), false) c.Emit4Bytes(encodeUnconditionalBranch(true, 0)) // 0 = placeholder case callInd: c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true)) @@ -417,6 +417,12 @@ func (i *instruction) encode(m *machine) { )) case dmb: c.Emit4Bytes(encodeDMB()) + case tailCall: + // We still don't know the exact address of the function to call, so we emit a placeholder. + c.AddRelocationInfo(i.callFuncRef(), true) // true = IsTailCall + c.Emit4Bytes(encodeUnconditionalBranch(false, 0)) // 0 = placeholder + case tailCallInd: + c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], false)) default: panic(i.String()) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index f9df356c0..190bc6014 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -788,6 +788,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { instr.asDMB() m.insert(instr) + case ssa.OpcodeTailCallReturnCall, ssa.OpcodeTailCallReturnCallIndirect: + m.lowerTailCall(instr) + default: panic("TODO: lowering " + op.String()) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go index c646a8fab..16d0746e5 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go @@ -198,6 +198,11 @@ func (m *machine) postRegAlloc() { switch cur.kind { case ret: m.setupEpilogueAfter(cur.prev) + case tailCall, tailCallInd: + m.setupEpilogueAfter(cur.prev) + // If this has been encoded as a proper tail call, we can remove the trailing instructions. + // For details, see internal/engine/RATIONALE.md + m.removeUntilRet(cur.next) case loadConstBlockArg: lc := cur next := lc.next @@ -325,6 +330,20 @@ func (m *machine) setupEpilogueAfter(cur *instruction) { linkInstr(cur, prevNext) } +// removeUntilRet removes the instructions starting from `cur` until the first `ret` instruction. +func (m *machine) removeUntilRet(cur *instruction) { + for ; cur != nil; cur = cur.next { + prev, next := cur.prev, cur.next + prev.next = next + if next != nil { + next.prev = prev + } + if cur.kind == ret { + return + } + } +} + // saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient // stack space left. Basically this is the combination of CalleeSavedRegisters plus argument registers execpt for x0, // which always points to the execution context whenever the native code is entered from Go. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go index 932fe842b..9bb4dee15 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go @@ -59,13 +59,19 @@ func (m *machine) ResolveRelocations( if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset { // Find the near trampoline island from callTrampolineIslandOffsets. islandOffset := searchTrampolineIsland(callTrampolineIslandOffsets, int(instrOffset)) - islandTargetOffset := islandOffset + trampolineCallSize*int(r.FuncRef) + // Imported functions don't need trampolines, so we ignore them when we compute the offset + // (see also encodeCallTrampolineIsland) + funcOffset := int(r.FuncRef) - importedFns + islandTargetOffset := islandOffset + trampolineCallSize*funcOffset diff = int64(islandTargetOffset) - (instrOffset) if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset { panic("BUG in trampoline placement") } } - binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(true, diff)) + // The unconditional branch instruction is usually encoded as a branch-and-link (BL), + // because it is a function call. However, if the instruction is a tail call, + // we encode it as a plain unconditional branch (B), so we won't overwrite the link register. + binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(!r.IsTailCall, diff)) } } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go index a6df3e7e7..a603dbdd7 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go @@ -6,8 +6,10 @@ import ( "errors" "fmt" "runtime" + "slices" "sort" "sync" + "sync/atomic" "unsafe" "github.com/tetratelabs/wazero/api" @@ -23,11 +25,15 @@ import ( ) type ( + compiledModuleWithCount struct { + *compiledModule + refCount int + } // engine implements wasm.Engine. engine struct { wazeroVersion string fileCache filecache.Cache - compiledModules map[wasm.ModuleID]*compiledModule + compiledModules map[wasm.ModuleID]*compiledModuleWithCount // sortedCompiledModules is a list of compiled modules sorted by the initial address of the executable. sortedCompiledModules []*compiledModule mux sync.RWMutex @@ -42,25 +48,32 @@ type ( } sharedFunctions struct { - // memoryGrowExecutable is a compiled trampoline executable for memory.grow builtin function. - memoryGrowExecutable []byte - // checkModuleExitCode is a compiled trampoline executable for checking module instance exit code. This - // is used when ensureTermination is true. - checkModuleExitCode []byte - // stackGrowExecutable is a compiled executable for growing stack builtin function. - stackGrowExecutable []byte - // tableGrowExecutable is a compiled trampoline executable for table.grow builtin function. - tableGrowExecutable []byte - // refFuncExecutable is a compiled trampoline executable for ref.func builtin function. - refFuncExecutable []byte - // memoryWait32Executable is a compiled trampoline executable for memory.wait32 builtin function - memoryWait32Executable []byte - // memoryWait64Executable is a compiled trampoline executable for memory.wait64 builtin function - memoryWait64Executable []byte - // memoryNotifyExecutable is a compiled trampoline executable for memory.notify builtin function - memoryNotifyExecutable []byte - listenerBeforeTrampolines map[*wasm.FunctionType][]byte - listenerAfterTrampolines map[*wasm.FunctionType][]byte + // The compiled trampolines executable. + executable []byte + // memoryGrowAddress is the address of memory.grow builtin function. + memoryGrowAddress *byte + // checkModuleExitCodeAddress is the address of checking module instance exit code. + // This is used when ensureTermination is true. + checkModuleExitCodeAddress *byte + // stackGrowAddress is the address of growing stack builtin function. + stackGrowAddress *byte + // tableGrowAddress is the address of table.grow builtin function. + tableGrowAddress *byte + // refFuncAddress is the address of ref.func builtin function. + refFuncAddress *byte + // memoryWait32Address is the address of memory.wait32 builtin function + memoryWait32Address *byte + // memoryWait64Address is the address of memory.wait64 builtin function + memoryWait64Address *byte + // memoryNotifyAddress is the address of memory.notify builtin function + memoryNotifyAddress *byte + listenerTrampolines listenerTrampolines + } + + listenerTrampolines = map[*wasm.FunctionType]struct { + executable []byte + before *byte + after *byte } // compiledModule is a compiled variant of a wasm.Module and ready to be used for instantiation. @@ -83,8 +96,9 @@ type ( } executables struct { - executable []byte - entryPreambles [][]byte + executable []byte + entryPreambles []byte + entryPreamblesPtrs []*byte } ) @@ -105,7 +119,7 @@ func NewEngine(ctx context.Context, _ api.CoreFeatures, fc filecache.Cache) wasm machine := newMachine() be := backend.NewCompiler(ctx, machine, ssa.NewBuilder()) e := &engine{ - compiledModules: make(map[wasm.ModuleID]*compiledModule), + compiledModules: make(map[wasm.ModuleID]*compiledModuleWithCount), setFinalizer: runtime.SetFinalizer, machine: machine, be: be, @@ -164,23 +178,46 @@ func (e *engine) CompileModule(ctx context.Context, module *wasm.Module, listene } func (exec *executables) compileEntryPreambles(m *wasm.Module, machine backend.Machine, be backend.Compiler) { - exec.entryPreambles = make([][]byte, len(m.TypeSection)) - for i := range m.TypeSection { + if len(m.TypeSection) == 0 { + return + } + + var preambles []byte + sizes := make([]int, len(m.TypeSection)) + + for i := range sizes { typ := &m.TypeSection[i] sig := frontend.SignatureForWasmFunctionType(typ) be.Init() buf := machine.CompileEntryPreamble(&sig) - executable := mmapExecutable(buf) - exec.entryPreambles[i] = executable + preambles = append(preambles, buf...) + align := 15 & -len(preambles) // Align 16-bytes boundary. + preambles = append(preambles, make([]byte, align)...) + sizes[i] = len(buf) + align + } + + exec.entryPreambles = mmapExecutable(preambles) + exec.entryPreamblesPtrs = make([]*byte, len(sizes)) + + offset := 0 + for i, size := range sizes { + ptr := &exec.entryPreambles[offset] + exec.entryPreamblesPtrs[i] = ptr + offset += size if wazevoapi.PerfMapEnabled { - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&executable[0])), - uint64(len(executable)), fmt.Sprintf("entry_preamble::type=%s", typ.String())) + typ := &m.TypeSection[i] + wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(ptr)), + uint64(size), fmt.Sprintf("entry_preamble::type=%s", typ.String())) } } } func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (*compiledModule, error) { + if module.IsHostModule { + return e.compileHostModule(ctx, module, listeners) + } + withListener := len(listeners) > 0 cm := &compiledModule{ offsets: wazevoapi.NewModuleContextOffsetData(module, withListener), parent: e, module: module, @@ -188,116 +225,137 @@ func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listene executables: &executables{}, } - if module.IsHostModule { - return e.compileHostModule(ctx, module, listeners) - } - importedFns, localFns := int(module.ImportFunctionCount), len(module.FunctionSection) if localFns == 0 { return cm, nil } - rels := make([]backend.RelocationInfo, 0) - refToBinaryOffset := make([]int, importedFns+localFns) - - if wazevoapi.DeterministicCompilationVerifierEnabled { - // The compilation must be deterministic regardless of the order of functions being compiled. - wazevoapi.DeterministicCompilationVerifierRandomizeIndexes(ctx) + machine := newMachine() + relocator, err := newEngineRelocator(machine, importedFns, localFns) + if err != nil { + return nil, err } needSourceInfo := module.DWARFLines != nil - // Creates new compiler instances which are reused for each function. ssaBuilder := ssa.NewBuilder() - fe := frontend.NewFrontendCompiler(module, ssaBuilder, &cm.offsets, ensureTermination, withListener, needSourceInfo) - machine := newMachine() be := backend.NewCompiler(ctx, machine, ssaBuilder) - cm.executables.compileEntryPreambles(module, machine, be) - - totalSize := 0 // Total binary size of the executable. cm.functionOffsets = make([]int, localFns) - bodies := make([][]byte, localFns) - // Trampoline relocation related variables. - trampolineInterval, callTrampolineIslandSize, err := machine.CallTrampolineIslandInfo(localFns) - if err != nil { - return nil, err + var indexes []int + if wazevoapi.DeterministicCompilationVerifierEnabled { + // The compilation must be deterministic regardless of the order of functions being compiled. + indexes = wazevoapi.DeterministicCompilationVerifierRandomizeIndexes(ctx) } - needCallTrampoline := callTrampolineIslandSize > 0 - var callTrampolineIslandOffsets []int // Holds the offsets of trampoline islands. - for i := range module.CodeSection { - if wazevoapi.DeterministicCompilationVerifierEnabled { - i = wazevoapi.DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex(ctx, i) - } + if workers := experimental.GetCompilationWorkers(ctx); workers <= 1 { + // Compile with a single goroutine. + fe := frontend.NewFrontendCompiler(module, ssaBuilder, &cm.offsets, ensureTermination, withListener, needSourceInfo) - fidx := wasm.Index(i + importedFns) - - if wazevoapi.NeedFunctionNameInContext { - def := module.FunctionDefinition(fidx) - name := def.DebugName() - if len(def.ExportNames()) > 0 { - name = def.ExportNames()[0] + for i := range module.CodeSection { + if wazevoapi.DeterministicCompilationVerifierEnabled { + i = indexes[i] } - ctx = wazevoapi.SetCurrentFunctionName(ctx, i, fmt.Sprintf("[%d/%d]%s", i, len(module.CodeSection)-1, name)) - } - - needListener := len(listeners) > 0 && listeners[i] != nil - body, relsPerFunc, err := e.compileLocalWasmFunction(ctx, module, wasm.Index(i), fe, ssaBuilder, be, needListener) - if err != nil { - return nil, fmt.Errorf("compile function %d/%d: %v", i, len(module.CodeSection)-1, err) - } - // Align 16-bytes boundary. - totalSize = (totalSize + 15) &^ 15 - cm.functionOffsets[i] = totalSize - - if needSourceInfo { - // At the beginning of the function, we add the offset of the function body so that - // we can resolve the source location of the call site of before listener call. - cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize)) - cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, module.CodeSection[i].BodyOffsetInCodeSection) + fidx := wasm.Index(i + importedFns) + fctx := functionContext(ctx, module, i, fidx) - for _, info := range be.SourceOffsetInfo() { - cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize)+uintptr(info.ExecutableOffset)) - cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, uint64(info.SourceOffset)) + needListener := len(listeners) > i && listeners[i] != nil + body, relsPerFunc, err := e.compileLocalWasmFunction(fctx, module, wasm.Index(i), fe, ssaBuilder, be, needListener) + if err != nil { + return nil, fmt.Errorf("compile function %d/%d: %v", i, len(module.CodeSection)-1, err) } - } - fref := frontend.FunctionIndexToFuncRef(fidx) - refToBinaryOffset[fref] = totalSize - - // At this point, relocation offsets are relative to the start of the function body, - // so we adjust it to the start of the executable. - for _, r := range relsPerFunc { - r.Offset += int64(totalSize) - rels = append(rels, r) + relocator.appendFunction(fctx, module, cm, i, fidx, body, relsPerFunc, be.SourceOffsetInfo()) } - - bodies[i] = body - totalSize += len(body) - if wazevoapi.PrintMachineCodeHexPerFunction { - fmt.Printf("[[[machine code for %s]]]\n%s\n\n", wazevoapi.GetCurrentFunctionName(ctx), hex.EncodeToString(body)) + } else { + // Compile with N worker goroutines. + // Collect compiled functions across workers in a slice, + // to be added to the relocator in-order and resolved serially at the end. + // This uses more memory and CPU (across cores), but can be significantly faster. + type compiledFunc struct { + fctx context.Context + fnum int + fidx wasm.Index + body []byte + relsPerFunc []backend.RelocationInfo + offsPerFunc []backend.SourceOffsetInfo + } + + compiledFuncs := make([]compiledFunc, len(module.CodeSection)) + ctx, cancel := context.WithCancelCause(ctx) + defer cancel(nil) + + var count atomic.Uint32 + var wg sync.WaitGroup + wg.Add(workers) + + for range workers { + go func() { + defer wg.Done() + + // Creates new compiler instances which are reused for each function. + machine := newMachine() + ssaBuilder := ssa.NewBuilder() + be := backend.NewCompiler(ctx, machine, ssaBuilder) + fe := frontend.NewFrontendCompiler(module, ssaBuilder, &cm.offsets, ensureTermination, withListener, needSourceInfo) + + for { + if err := ctx.Err(); err != nil { + // Compilation canceled! + return + } + + i := int(count.Add(1)) - 1 + if i >= len(module.CodeSection) { + return + } + + if wazevoapi.DeterministicCompilationVerifierEnabled { + i = indexes[i] + } + + fidx := wasm.Index(i + importedFns) + fctx := functionContext(ctx, module, i, fidx) + + needListener := len(listeners) > i && listeners[i] != nil + body, relsPerFunc, err := e.compileLocalWasmFunction(fctx, module, wasm.Index(i), fe, ssaBuilder, be, needListener) + if err != nil { + cancel(fmt.Errorf("compile function %d/%d: %v", i, len(module.CodeSection)-1, err)) + return + } + + compiledFuncs[i] = compiledFunc{ + fctx, i, fidx, body, + // These slices are internal to the backend compiler and since we are going to buffer them instead + // of process them immediately we need to copy the memory. + slices.Clone(relsPerFunc), + slices.Clone(be.SourceOffsetInfo()), + } + } + }() + } + + wg.Wait() + if err := context.Cause(ctx); err != nil { + return nil, err } - if needCallTrampoline { - // If the total size exceeds the trampoline interval, we need to add a trampoline island. - if totalSize/trampolineInterval > len(callTrampolineIslandOffsets) { - callTrampolineIslandOffsets = append(callTrampolineIslandOffsets, totalSize) - totalSize += callTrampolineIslandSize - } + for i := range compiledFuncs { + fn := &compiledFuncs[i] + relocator.appendFunction(fn.fctx, module, cm, fn.fnum, fn.fidx, fn.body, fn.relsPerFunc, fn.offsPerFunc) } } // Allocate executable memory and then copy the generated machine code. - executable, err := platform.MmapCodeSegment(totalSize) + executable, err := platform.MmapCodeSegment(relocator.totalSize) if err != nil { panic(err) } cm.executable = executable - for i, b := range bodies { + for i, b := range relocator.bodies { offset := cm.functionOffsets[i] copy(executable[offset:], b) } @@ -312,22 +370,108 @@ func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listene } } - // Resolve relocations for local function calls. - if len(rels) > 0 { - machine.ResolveRelocations(refToBinaryOffset, importedFns, executable, rels, callTrampolineIslandOffsets) - } + relocator.resolveRelocations(machine, executable, importedFns) - if runtime.GOARCH == "arm64" { - // On arm64, we cannot give all of rwx at the same time, so we change it to exec. - if err = platform.MprotectRX(executable); err != nil { - return nil, err - } + if err = platform.MprotectRX(executable); err != nil { + return nil, err } cm.sharedFunctions = e.sharedFunctions e.setFinalizer(cm.executables, executablesFinalizer) return cm, nil } +func functionContext(ctx context.Context, module *wasm.Module, fnum int, fidx wasm.Index) context.Context { + if wazevoapi.NeedFunctionNameInContext { + def := module.FunctionDefinition(fidx) + name := def.DebugName() + if len(def.ExportNames()) > 0 { + name = def.ExportNames()[0] + } + ctx = wazevoapi.SetCurrentFunctionName(ctx, fnum, fmt.Sprintf("[%d/%d]%s", fnum, len(module.CodeSection)-1, name)) + } + return ctx +} + +type engineRelocator struct { + bodies [][]byte + refToBinaryOffset []int + rels []backend.RelocationInfo + totalSize int // Total binary size of the executable. + trampolineInterval int + callTrampolineIslandSize int + callTrampolineIslandOffsets []int // Holds the offsets of trampoline islands. +} + +func newEngineRelocator( + machine backend.Machine, + importedFns, localFns int, +) (r engineRelocator, err error) { + // Trampoline relocation related variables. + r.trampolineInterval, r.callTrampolineIslandSize, err = machine.CallTrampolineIslandInfo(localFns) + r.refToBinaryOffset = make([]int, importedFns+localFns) + r.bodies = make([][]byte, 0, localFns) + return +} + +func (r *engineRelocator) resolveRelocations(machine backend.Machine, executable []byte, importedFns int) { + // Resolve relocations for local function calls. + if len(r.rels) > 0 { + machine.ResolveRelocations(r.refToBinaryOffset, importedFns, executable, r.rels, r.callTrampolineIslandOffsets) + } +} + +func (r *engineRelocator) appendFunction( + ctx context.Context, + module *wasm.Module, + cm *compiledModule, + fnum int, fidx wasm.Index, + body []byte, + relsPerFunc []backend.RelocationInfo, + offsPerFunc []backend.SourceOffsetInfo, +) { + // Align 16-bytes boundary. + r.totalSize = (r.totalSize + 15) &^ 15 + cm.functionOffsets[fnum] = r.totalSize + + needSourceInfo := module.DWARFLines != nil + if needSourceInfo { + // At the beginning of the function, we add the offset of the function body so that + // we can resolve the source location of the call site of before listener call. + cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(r.totalSize)) + cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, module.CodeSection[fnum].BodyOffsetInCodeSection) + + for _, info := range offsPerFunc { + cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(r.totalSize)+uintptr(info.ExecutableOffset)) + cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, uint64(info.SourceOffset)) + } + } + + fref := frontend.FunctionIndexToFuncRef(fidx) + r.refToBinaryOffset[fref] = r.totalSize + + // At this point, relocation offsets are relative to the start of the function body, + // so we adjust it to the start of the executable. + r.rels = slices.Grow(r.rels, len(relsPerFunc)) + for _, rel := range relsPerFunc { + rel.Offset += int64(r.totalSize) + r.rels = append(r.rels, rel) + } + + r.totalSize += len(body) + r.bodies = append(r.bodies, body) + if wazevoapi.PrintMachineCodeHexPerFunction { + fmt.Printf("[[[machine code for %s]]]\n%s\n\n", wazevoapi.GetCurrentFunctionName(ctx), hex.EncodeToString(body)) + } + + if r.callTrampolineIslandSize > 0 { + // If the total size exceeds the trampoline interval, we need to add a trampoline island. + if r.totalSize/r.trampolineInterval > len(r.callTrampolineIslandOffsets) { + r.callTrampolineIslandOffsets = append(r.callTrampolineIslandOffsets, r.totalSize) + r.totalSize += r.callTrampolineIslandSize + } + } +} + func (e *engine) compileLocalWasmFunction( ctx context.Context, module *wasm.Module, @@ -374,9 +518,7 @@ func (e *engine) compileLocalWasmFunction( } // TODO: optimize as zero copy. - copied := make([]byte, len(original)) - copy(copied, original) - return copied, rels, nil + return slices.Clone(original), rels, nil } func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener) (*compiledModule, error) { @@ -448,9 +590,7 @@ func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, lis } // TODO: optimize as zero copy. - copied := make([]byte, len(body)) - copy(copied, body) - bodies[i] = copied + bodies[i] = slices.Clone(body) totalSize += len(body) } @@ -475,11 +615,8 @@ func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, lis wazevoapi.PerfMap.Flush(uintptr(unsafe.Pointer(&executable[0])), cm.functionOffsets) } - if runtime.GOARCH == "arm64" { - // On arm64, we cannot give all of rwx at the same time, so we change it to exec. - if err = platform.MprotectRX(executable); err != nil { - return nil, err - } + if err = platform.MprotectRX(executable); err != nil { + return nil, err } e.setFinalizer(cm.executables, executablesFinalizer) return cm, nil @@ -507,12 +644,17 @@ func (e *engine) DeleteCompiledModule(m *wasm.Module) { e.mux.Lock() defer e.mux.Unlock() cm, ok := e.compiledModules[m.ID] - if ok { - if len(cm.executable) > 0 { - e.deleteCompiledModuleFromSortedList(cm) - } - delete(e.compiledModules, m.ID) + if !ok { + return } + cm.refCount-- + if cm.refCount > 0 { + return + } + if len(cm.executable) > 0 { + e.deleteCompiledModuleFromSortedList(cm.compiledModule) + } + delete(e.compiledModules, m.ID) } func (e *engine) addCompiledModuleToSortedList(cm *compiledModule) { @@ -569,7 +711,7 @@ func (e *engine) NewModuleEngine(m *wasm.Module, mi *wasm.ModuleInstance) (wasm. // Note: imported functions are resolved in moduleEngine.ResolveImportedFunction. me.importedFunctions = make([]importedFunction, m.ImportFunctionCount) - compiled, ok := e.getCompiledModuleFromMemory(m) + compiled, ok := e.getCompiledModuleFromMemory(m, false) if !ok { return nil, errors.New("source module must be compiled before instantiation") } @@ -591,167 +733,123 @@ func (e *engine) NewModuleEngine(m *wasm.Module, mi *wasm.ModuleInstance) (wasm. } func (e *engine) compileSharedFunctions() { - e.sharedFunctions = &sharedFunctions{ - listenerBeforeTrampolines: make(map[*wasm.FunctionType][]byte), - listenerAfterTrampolines: make(map[*wasm.FunctionType][]byte), + var sizes [8]int + var trampolines []byte + + addTrampoline := func(i int, buf []byte) { + trampolines = append(trampolines, buf...) + align := 15 & -len(trampolines) // Align 16-bytes boundary. + trampolines = append(trampolines, make([]byte, align)...) + sizes[i] = len(buf) + align } e.be.Init() - { - src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeGrowMemory, &ssa.Signature{ + addTrampoline(0, + e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeGrowMemory, &ssa.Signature{ Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32}, Results: []ssa.Type{ssa.TypeI32}, - }, false) - e.sharedFunctions.memoryGrowExecutable = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.memoryGrowExecutable - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_grow_trampoline") - } - } + }, false)) e.be.Init() - { - src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeTableGrow, &ssa.Signature{ + addTrampoline(1, + e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeTableGrow, &ssa.Signature{ Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* table index */, ssa.TypeI32 /* num */, ssa.TypeI64 /* ref */}, Results: []ssa.Type{ssa.TypeI32}, - }, false) - e.sharedFunctions.tableGrowExecutable = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.tableGrowExecutable - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "table_grow_trampoline") - } - } + }, false)) e.be.Init() - { - src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCheckModuleExitCode, &ssa.Signature{ + addTrampoline(2, + e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCheckModuleExitCode, &ssa.Signature{ Params: []ssa.Type{ssa.TypeI32 /* exec context */}, Results: []ssa.Type{ssa.TypeI32}, - }, false) - e.sharedFunctions.checkModuleExitCode = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.checkModuleExitCode - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "check_module_exit_code_trampoline") - } - } + }, false)) e.be.Init() - { - src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeRefFunc, &ssa.Signature{ + addTrampoline(3, + e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeRefFunc, &ssa.Signature{ Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* function index */}, Results: []ssa.Type{ssa.TypeI64}, // returns the function reference. - }, false) - e.sharedFunctions.refFuncExecutable = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.refFuncExecutable - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "ref_func_trampoline") - } - } + }, false)) e.be.Init() - { - src := e.machine.CompileStackGrowCallSequence() - e.sharedFunctions.stackGrowExecutable = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.stackGrowExecutable - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "stack_grow_trampoline") - } - } + addTrampoline(4, e.machine.CompileStackGrowCallSequence()) e.be.Init() - { - src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait32, &ssa.Signature{ + addTrampoline(5, + e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait32, &ssa.Signature{ // exec context, timeout, expected, addr Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI32, ssa.TypeI64}, // Returns the status. Results: []ssa.Type{ssa.TypeI32}, - }, false) - e.sharedFunctions.memoryWait32Executable = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.memoryWait32Executable - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait32_trampoline") - } - } + }, false)) e.be.Init() - { - src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait64, &ssa.Signature{ + addTrampoline(6, + e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait64, &ssa.Signature{ // exec context, timeout, expected, addr Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64, ssa.TypeI64}, // Returns the status. Results: []ssa.Type{ssa.TypeI32}, - }, false) - e.sharedFunctions.memoryWait64Executable = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.memoryWait64Executable - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait64_trampoline") - } - } + }, false)) e.be.Init() - { - src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryNotify, &ssa.Signature{ + addTrampoline(7, + e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryNotify, &ssa.Signature{ // exec context, count, addr Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32, ssa.TypeI64}, // Returns the number notified. Results: []ssa.Type{ssa.TypeI32}, - }, false) - e.sharedFunctions.memoryNotifyExecutable = mmapExecutable(src) - if wazevoapi.PerfMapEnabled { - exe := e.sharedFunctions.memoryNotifyExecutable - wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_notify_trampoline") - } + }, false)) + + fns := &sharedFunctions{ + executable: mmapExecutable(trampolines), + listenerTrampolines: make(listenerTrampolines), + } + e.setFinalizer(fns, sharedFunctionsFinalizer) + + offset := 0 + fns.memoryGrowAddress = &fns.executable[offset] + offset += sizes[0] + fns.tableGrowAddress = &fns.executable[offset] + offset += sizes[1] + fns.checkModuleExitCodeAddress = &fns.executable[offset] + offset += sizes[2] + fns.refFuncAddress = &fns.executable[offset] + offset += sizes[3] + fns.stackGrowAddress = &fns.executable[offset] + offset += sizes[4] + fns.memoryWait32Address = &fns.executable[offset] + offset += sizes[5] + fns.memoryWait64Address = &fns.executable[offset] + offset += sizes[6] + fns.memoryNotifyAddress = &fns.executable[offset] + + if wazevoapi.PerfMapEnabled { + wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.memoryGrowAddress)), uint64(sizes[0]), "memory_grow_trampoline") + wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.tableGrowAddress)), uint64(sizes[1]), "table_grow_trampoline") + wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.checkModuleExitCodeAddress)), uint64(sizes[2]), "check_module_exit_code_trampoline") + wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.refFuncAddress)), uint64(sizes[3]), "ref_func_trampoline") + wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.stackGrowAddress)), uint64(sizes[4]), "stack_grow_trampoline") + wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.memoryWait32Address)), uint64(sizes[5]), "memory_wait32_trampoline") + wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.memoryWait64Address)), uint64(sizes[6]), "memory_wait64_trampoline") + wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.memoryNotifyAddress)), uint64(sizes[7]), "memory_notify_trampoline") } - e.setFinalizer(e.sharedFunctions, sharedFunctionsFinalizer) + e.sharedFunctions = fns } func sharedFunctionsFinalizer(sf *sharedFunctions) { - if err := platform.MunmapCodeSegment(sf.memoryGrowExecutable); err != nil { - panic(err) - } - if err := platform.MunmapCodeSegment(sf.checkModuleExitCode); err != nil { - panic(err) - } - if err := platform.MunmapCodeSegment(sf.stackGrowExecutable); err != nil { - panic(err) - } - if err := platform.MunmapCodeSegment(sf.tableGrowExecutable); err != nil { - panic(err) - } - if err := platform.MunmapCodeSegment(sf.refFuncExecutable); err != nil { + if err := platform.MunmapCodeSegment(sf.executable); err != nil { panic(err) } - if err := platform.MunmapCodeSegment(sf.memoryWait32Executable); err != nil { - panic(err) - } - if err := platform.MunmapCodeSegment(sf.memoryWait64Executable); err != nil { - panic(err) - } - if err := platform.MunmapCodeSegment(sf.memoryNotifyExecutable); err != nil { - panic(err) - } - for _, f := range sf.listenerBeforeTrampolines { - if err := platform.MunmapCodeSegment(f); err != nil { - panic(err) - } - } - for _, f := range sf.listenerAfterTrampolines { - if err := platform.MunmapCodeSegment(f); err != nil { + for _, f := range sf.listenerTrampolines { + if err := platform.MunmapCodeSegment(f.executable); err != nil { panic(err) } } - sf.memoryGrowExecutable = nil - sf.checkModuleExitCode = nil - sf.stackGrowExecutable = nil - sf.tableGrowExecutable = nil - sf.refFuncExecutable = nil - sf.memoryWait32Executable = nil - sf.memoryWait64Executable = nil - sf.memoryNotifyExecutable = nil - sf.listenerBeforeTrampolines = nil - sf.listenerAfterTrampolines = nil + sf.executable = nil + sf.listenerTrampolines = nil } func executablesFinalizer(exec *executables) { @@ -762,12 +860,13 @@ func executablesFinalizer(exec *executables) { } exec.executable = nil - for _, f := range exec.entryPreambles { - if err := platform.MunmapCodeSegment(f); err != nil { + if len(exec.entryPreambles) > 0 { + if err := platform.MunmapCodeSegment(exec.entryPreambles); err != nil { panic(err) } } exec.entryPreambles = nil + exec.entryPreamblesPtrs = nil } func mmapExecutable(src []byte) []byte { @@ -778,11 +877,8 @@ func mmapExecutable(src []byte) []byte { copy(executable, src) - if runtime.GOARCH == "arm64" { - // On arm64, we cannot give all of rwx at the same time, so we change it to exec. - if err = platform.MprotectRX(executable); err != nil { - panic(err) - } + if err = platform.MprotectRX(executable); err != nil { + panic(err) } return executable } @@ -804,25 +900,30 @@ func (e *engine) getListenerTrampolineForType(functionType *wasm.FunctionType) ( e.mux.Lock() defer e.mux.Unlock() - beforeBuf, ok := e.sharedFunctions.listenerBeforeTrampolines[functionType] - afterBuf := e.sharedFunctions.listenerAfterTrampolines[functionType] - if ok { - return &beforeBuf[0], &afterBuf[0] - } + trampoline, ok := e.sharedFunctions.listenerTrampolines[functionType] + if !ok { + var executable []byte + beforeSig, afterSig := frontend.SignatureForListener(functionType) - beforeSig, afterSig := frontend.SignatureForListener(functionType) + e.be.Init() + buf := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerBefore, beforeSig, false) + executable = append(executable, buf...) - e.be.Init() - buf := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerBefore, beforeSig, false) - beforeBuf = mmapExecutable(buf) + align := 15 & -len(executable) // Align 16-bytes boundary. + executable = append(executable, make([]byte, align)...) + offset := len(executable) - e.be.Init() - buf = e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerAfter, afterSig, false) - afterBuf = mmapExecutable(buf) + e.be.Init() + buf = e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerAfter, afterSig, false) + executable = append(executable, buf...) - e.sharedFunctions.listenerBeforeTrampolines[functionType] = beforeBuf - e.sharedFunctions.listenerAfterTrampolines[functionType] = afterBuf - return &beforeBuf[0], &afterBuf[0] + trampoline.executable = mmapExecutable(executable) + trampoline.before = &trampoline.executable[0] + trampoline.after = &trampoline.executable[offset] + + e.sharedFunctions.listenerTrampolines[functionType] = trampoline + } + return trampoline.before, trampoline.after } func (cm *compiledModule) getSourceOffset(pc uintptr) uint64 { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go index e49353dc8..e0446e08a 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go @@ -8,7 +8,6 @@ import ( "fmt" "hash/crc32" "io" - "runtime" "unsafe" "github.com/tetratelabs/wazero/experimental" @@ -33,7 +32,7 @@ func fileCacheKey(m *wasm.Module) (ret filecache.Key) { s.Write(magic) // Write the CPU features so that we can cache the compiled module for the same CPU. // This prevents the incompatible CPU features from being used. - cpu := platform.CpuFeatures.Raw() + cpu := platform.CpuFeatures().Raw() // Reuse the `ret` buffer to write the first 8 bytes of the CPU features so that we can avoid the allocation. binary.LittleEndian.PutUint64(ret[:8], cpu) s.Write(ret[:8]) @@ -51,7 +50,7 @@ func (e *engine) addCompiledModule(module *wasm.Module, cm *compiledModule) (err } func (e *engine) getCompiledModule(module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (cm *compiledModule, ok bool, err error) { - cm, ok = e.getCompiledModuleFromMemory(module) + cm, ok = e.getCompiledModuleFromMemory(module, true) if ok { return } @@ -88,16 +87,23 @@ func (e *engine) getCompiledModule(module *wasm.Module, listeners []experimental func (e *engine) addCompiledModuleToMemory(m *wasm.Module, cm *compiledModule) { e.mux.Lock() defer e.mux.Unlock() - e.compiledModules[m.ID] = cm + e.compiledModules[m.ID] = &compiledModuleWithCount{compiledModule: cm, refCount: 1} if len(cm.executable) > 0 { e.addCompiledModuleToSortedList(cm) } } -func (e *engine) getCompiledModuleFromMemory(module *wasm.Module) (cm *compiledModule, ok bool) { - e.mux.RLock() - defer e.mux.RUnlock() - cm, ok = e.compiledModules[module.ID] +func (e *engine) getCompiledModuleFromMemory(module *wasm.Module, increaseRefCount bool) (cm *compiledModule, ok bool) { + e.mux.Lock() + defer e.mux.Unlock() + + cmWithCount, ok := e.compiledModules[module.ID] + if ok { + cm = cmWithCount.compiledModule + if increaseRefCount { + cmWithCount.refCount++ + } + } return } @@ -246,11 +252,8 @@ func deserializeCompiledModule(wazeroVersion string, reader io.ReadCloser) (cm * return nil, false, fmt.Errorf("compilationcache: checksum mismatch (expected %d, got %d)", expected, checksum) } - if runtime.GOARCH == "arm64" { - // On arm64, we cannot give all of rwx at the same time, so we change it to exec. - if err = platform.MprotectRX(executable); err != nil { - return nil, false, err - } + if err = platform.MprotectRX(executable); err != nil { + return nil, false, err } cm.executable = executable } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go index eebdba034..5749e03c7 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go @@ -469,7 +469,7 @@ func (c *Compiler) allocateVarLengthValues(_cap int, vs ...ssa.Value) ssa.Values builder := c.ssaBuilder pool := builder.VarLengthPool() args := pool.Allocate(_cap) - args = args.Append(builder.VarLengthPool(), vs...) + args = args.Append(pool, vs...) return args } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go index e73debbd1..1277db0bf 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go @@ -123,8 +123,7 @@ func (c *Compiler) nPeekDup(n int) ssa.Values { l := c.state() tail := len(l.values) - args := c.allocateVarLengthValues(n) - args = args.Append(c.ssaBuilder.VarLengthPool(), l.values[tail-n:tail]...) + args := c.allocateVarLengthValues(n, l.values[tail-n:tail]...) return args } @@ -665,19 +664,22 @@ func (c *Compiler) lowerCurrentOpcode() { tableBaseAddr := c.loadTableBaseAddr(tableInstancePtr) addr := builder.AllocateInstruction().AsIadd(tableBaseAddr, offsetInBytes).Insert(builder).Return() - // Prepare the loop and following block. - beforeLoop := builder.AllocateBasicBlock() - loopBlk := builder.AllocateBasicBlock() - loopVar := loopBlk.AddParam(builder, ssa.TypeI64) - followingBlk := builder.AllocateBasicBlock() - // Uses the copy trick for faster filling buffer like memory.fill, but in this case we copy 8 bytes at a time. + // Tables are rarely huge, so ignore the 8KB maximum. + // https://github.com/golang/go/blob/go1.24.0/src/slices/slices.go#L514-L517 + // // buf := memoryInst.Buffer[offset : offset+fillSize] // buf[0:8] = value // for i := 8; i < fillSize; i *= 2 { Begin with 8 bytes. // copy(buf[i:], buf[:i]) // } + // Prepare the loop and following block. + beforeLoop := builder.AllocateBasicBlock() + loopBlk := builder.AllocateBasicBlock() + loopVar := loopBlk.AddParam(builder, ssa.TypeI64) + followingBlk := builder.AllocateBasicBlock() + // Insert the jump to the beforeLoop block; If the fillSize is zero, then jump to the following block to skip entire logics. zero := builder.AllocateInstruction().AsIconst64(0).Insert(builder).Return() ifFillSizeZero := builder.AllocateInstruction().AsIcmp(fillSizeExt, zero, ssa.IntegerCmpCondEqual). @@ -688,32 +690,24 @@ func (c *Compiler) lowerCurrentOpcode() { // buf[0:8] = value builder.SetCurrentBlock(beforeLoop) builder.AllocateInstruction().AsStore(ssa.OpcodeStore, value, addr, 0).Insert(builder) - initValue := builder.AllocateInstruction().AsIconst64(8).Insert(builder).Return() - c.insertJumpToBlock(c.allocateVarLengthValues(1, initValue), loopBlk) + eight := builder.AllocateInstruction().AsIconst64(8).Insert(builder).Return() + c.insertJumpToBlock(c.allocateVarLengthValues(1, eight), loopBlk) builder.SetCurrentBlock(loopBlk) dstAddr := builder.AllocateInstruction().AsIadd(addr, loopVar).Insert(builder).Return() - // If loopVar*2 > fillSizeInBytes, then count must be fillSizeInBytes-loopVar. - var count ssa.Value - { - loopVarDoubled := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return() - loopVarDoubledLargerThanFillSize := builder. - AllocateInstruction().AsIcmp(loopVarDoubled, fillSizeInBytes, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual). - Insert(builder).Return() - diff := builder.AllocateInstruction().AsIsub(fillSizeInBytes, loopVar).Insert(builder).Return() - count = builder.AllocateInstruction().AsSelect(loopVarDoubledLargerThanFillSize, diff, loopVar).Insert(builder).Return() - } + newLoopVar := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return() + newLoopVarLessThanFillSize := builder.AllocateInstruction(). + AsIcmp(newLoopVar, fillSizeInBytes, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return() - c.callMemmove(dstAddr, addr, count) + // On the last iteration, count must be fillSizeInBytes-loopVar. + diff := builder.AllocateInstruction().AsIsub(fillSizeInBytes, loopVar).Insert(builder).Return() + count := builder.AllocateInstruction().AsSelect(newLoopVarLessThanFillSize, loopVar, diff).Insert(builder).Return() - shiftAmount := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return() - newLoopVar := builder.AllocateInstruction().AsIshl(loopVar, shiftAmount).Insert(builder).Return() - loopVarLessThanFillSize := builder.AllocateInstruction(). - AsIcmp(newLoopVar, fillSizeInBytes, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return() + c.callMemmove(dstAddr, addr, count) builder.AllocateInstruction(). - AsBrnz(loopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk). + AsBrnz(newLoopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk). Insert(builder) c.insertJumpToBlock(ssa.ValuesNil, followingBlk) @@ -741,11 +735,15 @@ func (c *Compiler) lowerCurrentOpcode() { // Calculate the base address: addr := builder.AllocateInstruction().AsIadd(c.getMemoryBaseValue(false), offset).Insert(builder).Return() - // Uses the copy trick for faster filling buffer: https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d + // Uses the copy trick for faster filling buffer, with a maximum chunk size of 8KB. + // https://github.com/golang/go/blob/go1.24.0/src/bytes/bytes.go#L664-L673 + // // buf := memoryInst.Buffer[offset : offset+fillSize] // buf[0] = value - // for i := 1; i < fillSize; i *= 2 { - // copy(buf[i:], buf[:i]) + // for i := 1; i < fillSize; { + // chunk := ((i - 1) & 8191) + 1 + // copy(buf[i:], buf[:chunk]) + // i += chunk // } // Prepare the loop and following block. @@ -764,32 +762,31 @@ func (c *Compiler) lowerCurrentOpcode() { // buf[0] = value builder.SetCurrentBlock(beforeLoop) builder.AllocateInstruction().AsStore(ssa.OpcodeIstore8, value, addr, 0).Insert(builder) - initValue := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return() - c.insertJumpToBlock(c.allocateVarLengthValues(1, initValue), loopBlk) + one := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return() + c.insertJumpToBlock(c.allocateVarLengthValues(1, one), loopBlk) builder.SetCurrentBlock(loopBlk) dstAddr := builder.AllocateInstruction().AsIadd(addr, loopVar).Insert(builder).Return() - // If loopVar*2 > fillSizeExt, then count must be fillSizeExt-loopVar. - var count ssa.Value - { - loopVarDoubled := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return() - loopVarDoubledLargerThanFillSize := builder. - AllocateInstruction().AsIcmp(loopVarDoubled, fillSize, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual). - Insert(builder).Return() - diff := builder.AllocateInstruction().AsIsub(fillSize, loopVar).Insert(builder).Return() - count = builder.AllocateInstruction().AsSelect(loopVarDoubledLargerThanFillSize, diff, loopVar).Insert(builder).Return() - } - - c.callMemmove(dstAddr, addr, count) + // chunk := ((i - 1) & 8191) + 1 + mask := builder.AllocateInstruction().AsIconst64(8191).Insert(builder).Return() + tmp1 := builder.AllocateInstruction().AsIsub(loopVar, one).Insert(builder).Return() + tmp2 := builder.AllocateInstruction().AsBand(tmp1, mask).Insert(builder).Return() + chunk := builder.AllocateInstruction().AsIadd(tmp2, one).Insert(builder).Return() - shiftAmount := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return() - newLoopVar := builder.AllocateInstruction().AsIshl(loopVar, shiftAmount).Insert(builder).Return() - loopVarLessThanFillSize := builder.AllocateInstruction(). + // i += chunk + newLoopVar := builder.AllocateInstruction().AsIadd(loopVar, chunk).Insert(builder).Return() + newLoopVarLessThanFillSize := builder.AllocateInstruction(). AsIcmp(newLoopVar, fillSize, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return() + // count = min(chunk, fillSize-loopVar) + diff := builder.AllocateInstruction().AsIsub(fillSize, loopVar).Insert(builder).Return() + count := builder.AllocateInstruction().AsSelect(newLoopVarLessThanFillSize, chunk, diff).Insert(builder).Return() + + c.callMemmove(dstAddr, addr, count) + builder.AllocateInstruction(). - AsBrnz(loopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk). + AsBrnz(newLoopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk). Insert(builder) c.insertJumpToBlock(ssa.ValuesNil, followingBlk) @@ -1173,7 +1170,7 @@ func (c *Compiler) lowerCurrentOpcode() { ssa.TypeI64, ).Insert(builder).Return() - args := c.allocateVarLengthValues(1, c.execCtxPtrValue, pages) + args := c.allocateVarLengthValues(2, c.execCtxPtrValue, pages) callGrowRet := builder. AllocateInstruction(). AsCallIndirect(memoryGrowPtr, &c.memoryGrowSig, args). @@ -1343,8 +1340,7 @@ func (c *Compiler) lowerCurrentOpcode() { blockType: bt, }) - args := c.allocateVarLengthValues(originalLen) - args = args.Append(builder.VarLengthPool(), state.values[originalLen:]...) + args := c.allocateVarLengthValues(len(bt.Params), state.values[originalLen:]...) // Insert the jump to the header of loop. br := builder.AllocateInstruction() @@ -1383,8 +1379,7 @@ func (c *Compiler) lowerCurrentOpcode() { // multiple definitions (one in Then and another in Else blocks). c.addBlockParamsFromWasmTypes(bt.Results, followingBlk) - args := c.allocateVarLengthValues(len(bt.Params)) - args = args.Append(builder.VarLengthPool(), state.values[len(state.values)-len(bt.Params):]...) + args := c.allocateVarLengthValues(len(bt.Params), state.values[len(state.values)-len(bt.Params):]...) // Insert the conditional jump to the Else block. brz := builder.AllocateInstruction() @@ -1568,11 +1563,7 @@ func (c *Compiler) lowerCurrentOpcode() { c.callListenerAfter() } - results := c.nPeekDup(c.results()) - instr := builder.AllocateInstruction() - - instr.AsReturn(results) - builder.InsertInstruction(instr) + c.lowerReturn(builder) state.unreachable = true case wasm.OpcodeUnreachable: @@ -1597,66 +1588,7 @@ func (c *Compiler) lowerCurrentOpcode() { if state.unreachable { break } - - var typIndex wasm.Index - if fnIndex < c.m.ImportFunctionCount { - // Before transfer the control to the callee, we have to store the current module's moduleContextPtr - // into execContext.callerModuleContextPtr in case when the callee is a Go function. - c.storeCallerModuleContext() - var fi int - for i := range c.m.ImportSection { - imp := &c.m.ImportSection[i] - if imp.Type == wasm.ExternTypeFunc { - if fi == int(fnIndex) { - typIndex = imp.DescFunc - break - } - fi++ - } - } - } else { - typIndex = c.m.FunctionSection[fnIndex-c.m.ImportFunctionCount] - } - typ := &c.m.TypeSection[typIndex] - - argN := len(typ.Params) - tail := len(state.values) - argN - vs := state.values[tail:] - state.values = state.values[:tail] - args := c.allocateVarLengthValues(2+len(vs), c.execCtxPtrValue) - - sig := c.signatures[typ] - call := builder.AllocateInstruction() - if fnIndex >= c.m.ImportFunctionCount { - args = args.Append(builder.VarLengthPool(), c.moduleCtxPtrValue) // This case the callee module is itself. - args = args.Append(builder.VarLengthPool(), vs...) - call.AsCall(FunctionIndexToFuncRef(fnIndex), sig, args) - builder.InsertInstruction(call) - } else { - // This case we have to read the address of the imported function from the module context. - moduleCtx := c.moduleCtxPtrValue - loadFuncPtr, loadModuleCtxPtr := builder.AllocateInstruction(), builder.AllocateInstruction() - funcPtrOffset, moduleCtxPtrOffset, _ := c.offset.ImportedFunctionOffset(fnIndex) - loadFuncPtr.AsLoad(moduleCtx, funcPtrOffset.U32(), ssa.TypeI64) - loadModuleCtxPtr.AsLoad(moduleCtx, moduleCtxPtrOffset.U32(), ssa.TypeI64) - builder.InsertInstruction(loadFuncPtr) - builder.InsertInstruction(loadModuleCtxPtr) - - args = args.Append(builder.VarLengthPool(), loadModuleCtxPtr.Return()) - args = args.Append(builder.VarLengthPool(), vs...) - call.AsCallIndirect(loadFuncPtr.Return(), sig, args) - builder.InsertInstruction(call) - } - - first, rest := call.Returns() - if first.Valid() { - state.push(first) - } - for _, v := range rest { - state.push(v) - } - - c.reloadAfterCall() + c.lowerCall(fnIndex) case wasm.OpcodeDrop: if state.unreachable { @@ -3190,7 +3122,7 @@ func (c *Compiler) lowerCurrentOpcode() { ssa.TypeI64, ).Insert(builder).Return() - args := c.allocateVarLengthValues(3, c.execCtxPtrValue, timeout, exp, addr) + args := c.allocateVarLengthValues(4, c.execCtxPtrValue, timeout, exp, addr) memoryWaitRet := builder.AllocateInstruction(). AsCallIndirect(memoryWaitPtr, sig, args). Insert(builder).Return() @@ -3211,7 +3143,7 @@ func (c *Compiler) lowerCurrentOpcode() { wazevoapi.ExecutionContextOffsetMemoryNotifyTrampolineAddress.U32(), ssa.TypeI64, ).Insert(builder).Return() - args := c.allocateVarLengthValues(2, c.execCtxPtrValue, count, addr) + args := c.allocateVarLengthValues(3, c.execCtxPtrValue, count, addr) memoryNotifyRet := builder.AllocateInstruction(). AsCallIndirect(memoryNotifyPtr, &c.memoryNotifySig, args). Insert(builder).Return() @@ -3460,6 +3392,25 @@ func (c *Compiler) lowerCurrentOpcode() { elementAddr := c.lowerAccessTableWithBoundsCheck(tableIndex, targetOffsetInTable) loaded := builder.AllocateInstruction().AsLoad(elementAddr, 0, ssa.TypeI64).Insert(builder).Return() state.push(loaded) + + case wasm.OpcodeTailCallReturnCallIndirect: + typeIndex := c.readI32u() + tableIndex := c.readI32u() + if state.unreachable { + break + } + _, _ = typeIndex, tableIndex + c.lowerTailCallReturnCallIndirect(typeIndex, tableIndex) + state.unreachable = true + + case wasm.OpcodeTailCallReturnCall: + fnIndex := c.readI32u() + if state.unreachable { + break + } + c.lowerTailCallReturnCall(fnIndex) + state.unreachable = true + default: panic("TODO: unsupported in wazevo yet: " + wasm.InstructionName(op)) } @@ -3473,6 +3424,14 @@ func (c *Compiler) lowerCurrentOpcode() { c.loweringState.pc++ } +func (c *Compiler) lowerReturn(builder ssa.Builder) { + results := c.nPeekDup(c.results()) + instr := builder.AllocateInstruction() + + instr.AsReturn(results) + builder.InsertInstruction(instr) +} + func (c *Compiler) lowerExtMul(v1, v2 ssa.Value, from, to ssa.VecLane, signed, low bool) ssa.Value { // TODO: The sequence `Widen; Widen; VIMul` can be substituted for a single instruction on some ISAs. builder := c.ssaBuilder @@ -3533,7 +3492,83 @@ func (c *Compiler) lowerAccessTableWithBoundsCheck(tableIndex uint32, elementOff return calcElementAddressInTable.Return() } -func (c *Compiler) lowerCallIndirect(typeIndex, tableIndex uint32) { +func (c *Compiler) prepareCall(fnIndex uint32) (isIndirect bool, sig *ssa.Signature, args ssa.Values, funcRefOrPtrValue uint64) { + builder := c.ssaBuilder + state := c.state() + var typIndex wasm.Index + if fnIndex < c.m.ImportFunctionCount { + // Before transfer the control to the callee, we have to store the current module's moduleContextPtr + // into execContext.callerModuleContextPtr in case when the callee is a Go function. + c.storeCallerModuleContext() + var fi int + for i := range c.m.ImportSection { + imp := &c.m.ImportSection[i] + if imp.Type == wasm.ExternTypeFunc { + if fi == int(fnIndex) { + typIndex = imp.DescFunc + break + } + fi++ + } + } + } else { + typIndex = c.m.FunctionSection[fnIndex-c.m.ImportFunctionCount] + } + typ := &c.m.TypeSection[typIndex] + + argN := len(typ.Params) + tail := len(state.values) - argN + vs := state.values[tail:] + state.values = state.values[:tail] + args = c.allocateVarLengthValues(2+len(vs), c.execCtxPtrValue) + + sig = c.signatures[typ] + if fnIndex >= c.m.ImportFunctionCount { + args = args.Append(builder.VarLengthPool(), c.moduleCtxPtrValue) // This case the callee module is itself. + args = args.Append(builder.VarLengthPool(), vs...) + return false, sig, args, uint64(FunctionIndexToFuncRef(fnIndex)) + } else { + // This case we have to read the address of the imported function from the module context. + moduleCtx := c.moduleCtxPtrValue + loadFuncPtr, loadModuleCtxPtr := builder.AllocateInstruction(), builder.AllocateInstruction() + funcPtrOffset, moduleCtxPtrOffset, _ := c.offset.ImportedFunctionOffset(fnIndex) + loadFuncPtr.AsLoad(moduleCtx, funcPtrOffset.U32(), ssa.TypeI64) + loadModuleCtxPtr.AsLoad(moduleCtx, moduleCtxPtrOffset.U32(), ssa.TypeI64) + builder.InsertInstruction(loadFuncPtr) + builder.InsertInstruction(loadModuleCtxPtr) + + args = args.Append(builder.VarLengthPool(), loadModuleCtxPtr.Return()) + args = args.Append(builder.VarLengthPool(), vs...) + + return true, sig, args, uint64(loadFuncPtr.Return()) + } +} + +func (c *Compiler) lowerCall(fnIndex uint32) { + builder := c.ssaBuilder + state := c.state() + isIndirect, sig, args, funcRefOrPtrValue := c.prepareCall(fnIndex) + + call := builder.AllocateInstruction() + if isIndirect { + call.AsCallIndirect(ssa.Value(funcRefOrPtrValue), sig, args) + } else { + call.AsCall(ssa.FuncRef(funcRefOrPtrValue), sig, args) + } + builder.InsertInstruction(call) + + first, rest := call.Returns() + if first.Valid() { + state.push(first) + } + for _, v := range rest { + state.push(v) + } + + c.reloadAfterCall() +} + +func (c *Compiler) prepareCallIndirect(typeIndex, tableIndex uint32) (ssa.Value, *wasm.FunctionType, ssa.Values) { builder := c.ssaBuilder state := c.state() @@ -3601,6 +3636,14 @@ func (c *Compiler) lowerCallIndirect(typeIndex, tableIndex uint32) { // into execContext.callerModuleContextPtr in case when the callee is a Go function. c.storeCallerModuleContext() + return executablePtr, typ, args +} + +func (c *Compiler) lowerCallIndirect(typeIndex, tableIndex uint32) { + builder := c.ssaBuilder + state := c.state() + executablePtr, typ, args := c.prepareCallIndirect(typeIndex, tableIndex) + call := builder.AllocateInstruction() call.AsCallIndirect(executablePtr, c.signatures[typ], args) builder.InsertInstruction(call) @@ -3616,6 +3659,62 @@ func (c *Compiler) lowerCallIndirect(typeIndex, tableIndex uint32) { c.reloadAfterCall() } +func (c *Compiler) lowerTailCallReturnCall(fnIndex uint32) { + isIndirect, sig, args, funcRefOrPtrValue := c.prepareCall(fnIndex) + builder := c.ssaBuilder + state := c.state() + + call := builder.AllocateInstruction() + if isIndirect { + call.AsTailCallReturnCallIndirect(ssa.Value(funcRefOrPtrValue), sig, args) + } else { + call.AsTailCallReturnCall(ssa.FuncRef(funcRefOrPtrValue), sig, args) + } + builder.InsertInstruction(call) + + // In a proper tail call, the following code is unreachable since execution + // transfers to the callee. However, sometimes the backend might need to fall back to + // a regular call, so we include return handling and let the backend delete it + // when redundant. + // For details, see internal/engine/RATIONALE.md + first, rest := call.Returns() + if first.Valid() { + state.push(first) + } + for _, v := range rest { + state.push(v) + } + + c.reloadAfterCall() + c.lowerReturn(builder) +} + +func (c *Compiler) lowerTailCallReturnCallIndirect(typeIndex, tableIndex uint32) { + builder := c.ssaBuilder + state := c.state() + executablePtr, typ, args := c.prepareCallIndirect(typeIndex, tableIndex) + + call := builder.AllocateInstruction() + call.AsTailCallReturnCallIndirect(executablePtr, c.signatures[typ], args) + builder.InsertInstruction(call) + + // In a proper tail call, the following code is unreachable since execution + // transfers to the callee. However, sometimes the backend might need to fall back to + // a regular call, so we include return handling and let the backend delete it + // when redundant. + // For details, see internal/engine/RATIONALE.md + first, rest := call.Returns() + if first.Valid() { + state.push(first) + } + for _, v := range rest { + state.push(v) + } + + c.reloadAfterCall() + c.lowerReturn(builder) +} + // memOpSetup inserts the bounds check and calculates the address of the memory operation (loads/stores). func (c *Compiler) memOpSetup(baseAddr ssa.Value, constOffset, operationSizeInBytes uint64) (address ssa.Value) { address = ssa.ValueInvalid diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go index 8811feed7..53206f1cc 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go @@ -174,20 +174,21 @@ func (m *moduleEngine) NewFunction(index wasm.Index) api.Function { indexInModule: index, executable: &p.executable[offset], parent: m, - preambleExecutable: &m.parent.entryPreambles[typIndex][0], + preambleExecutable: p.entryPreamblesPtrs[typIndex], sizeOfParamResultSlice: sizeOfParamResultSlice, requiredParams: typ.ParamNumInUint64, numberOfResults: typ.ResultNumInUint64, } - ce.execCtx.memoryGrowTrampolineAddress = &m.parent.sharedFunctions.memoryGrowExecutable[0] - ce.execCtx.stackGrowCallTrampolineAddress = &m.parent.sharedFunctions.stackGrowExecutable[0] - ce.execCtx.checkModuleExitCodeTrampolineAddress = &m.parent.sharedFunctions.checkModuleExitCode[0] - ce.execCtx.tableGrowTrampolineAddress = &m.parent.sharedFunctions.tableGrowExecutable[0] - ce.execCtx.refFuncTrampolineAddress = &m.parent.sharedFunctions.refFuncExecutable[0] - ce.execCtx.memoryWait32TrampolineAddress = &m.parent.sharedFunctions.memoryWait32Executable[0] - ce.execCtx.memoryWait64TrampolineAddress = &m.parent.sharedFunctions.memoryWait64Executable[0] - ce.execCtx.memoryNotifyTrampolineAddress = &m.parent.sharedFunctions.memoryNotifyExecutable[0] + sharedFunctions := p.sharedFunctions + ce.execCtx.memoryGrowTrampolineAddress = sharedFunctions.memoryGrowAddress + ce.execCtx.stackGrowCallTrampolineAddress = sharedFunctions.stackGrowAddress + ce.execCtx.checkModuleExitCodeTrampolineAddress = sharedFunctions.checkModuleExitCodeAddress + ce.execCtx.tableGrowTrampolineAddress = sharedFunctions.tableGrowAddress + ce.execCtx.refFuncTrampolineAddress = sharedFunctions.refFuncAddress + ce.execCtx.memoryWait32TrampolineAddress = sharedFunctions.memoryWait32Address + ce.execCtx.memoryWait64TrampolineAddress = sharedFunctions.memoryWait64Address + ce.execCtx.memoryNotifyTrampolineAddress = sharedFunctions.memoryNotifyAddress ce.execCtx.memmoveAddress = memmovPtr ce.init() return ce diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go index 9a3d1da6e..7b37a8afe 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go @@ -633,6 +633,14 @@ const ( // OpcodeFence is a memory fence operation. OpcodeFence + // OpcodeTailCallReturnCall is the equivalent of OpcodeCall (a "near" call) + // for tail calls. Semantically, it combines Call + Return into a single operation. + OpcodeTailCallReturnCall + + // OpcodeTailCallReturnCallIndirect is the equivalent of OpcodeCallIndirect (a call to a function address) + // for tail calls. Semantically, it combines CallIndirect + Return into a single operation. + OpcodeTailCallReturnCallIndirect + // opcodeEnd marks the end of the opcode list. opcodeEnd ) @@ -679,12 +687,44 @@ func (op AtomicRmwOp) String() string { type returnTypesFn func(b *builder, instr *Instruction) (t1 Type, ts []Type) var ( - returnTypesFnNoReturns returnTypesFn = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return typeInvalid, nil } - returnTypesFnSingle = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return instr.typ, nil } - returnTypesFnI32 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeI32, nil } - returnTypesFnF32 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF32, nil } - returnTypesFnF64 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF64, nil } - returnTypesFnV128 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeV128, nil } + returnTypesFnNoReturns returnTypesFn = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return typeInvalid, nil } + returnTypesFnSingle = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return instr.typ, nil } + returnTypesFnI32 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeI32, nil } + returnTypesFnF32 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF32, nil } + returnTypesFnF64 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF64, nil } + returnTypesFnV128 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeV128, nil } + returnTypesFnCallIndirect = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { + sigID := SignatureID(instr.u1) + sig, ok := b.signatures[sigID] + if !ok { + panic("BUG") + } + switch len(sig.Results) { + case 0: + t1 = typeInvalid + case 1: + t1 = sig.Results[0] + default: + t1, ts = sig.Results[0], sig.Results[1:] + } + return + } + returnTypesFnCall = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { + sigID := SignatureID(instr.u2) + sig, ok := b.signatures[sigID] + if !ok { + panic("BUG") + } + switch len(sig.Results) { + case 0: + t1 = typeInvalid + case 1: + t1 = sig.Results[0] + default: + t1, ts = sig.Results[0], sig.Results[1:] + } + return + } ) // sideEffect provides the info to determine if an instruction has side effects which @@ -846,6 +886,8 @@ var instructionSideEffects = [opcodeEnd]sideEffect{ OpcodeAtomicStore: sideEffectStrict, OpcodeAtomicCas: sideEffectStrict, OpcodeFence: sideEffectStrict, + OpcodeTailCallReturnCall: sideEffectStrict, + OpcodeTailCallReturnCallIndirect: sideEffectStrict, OpcodeWideningPairwiseDotProductS: sideEffectNone, } @@ -860,105 +902,75 @@ func (i *Instruction) sideEffect() sideEffect { // instructionReturnTypes provides the function to determine the return types of an instruction. var instructionReturnTypes = [opcodeEnd]returnTypesFn{ - OpcodeExtIaddPairwise: returnTypesFnV128, - OpcodeVbor: returnTypesFnV128, - OpcodeVbxor: returnTypesFnV128, - OpcodeVband: returnTypesFnV128, - OpcodeVbnot: returnTypesFnV128, - OpcodeVbandnot: returnTypesFnV128, - OpcodeVbitselect: returnTypesFnV128, - OpcodeVanyTrue: returnTypesFnI32, - OpcodeVallTrue: returnTypesFnI32, - OpcodeVhighBits: returnTypesFnI32, - OpcodeVIadd: returnTypesFnV128, - OpcodeVSaddSat: returnTypesFnV128, - OpcodeVUaddSat: returnTypesFnV128, - OpcodeVIsub: returnTypesFnV128, - OpcodeVSsubSat: returnTypesFnV128, - OpcodeVUsubSat: returnTypesFnV128, - OpcodeVIcmp: returnTypesFnV128, - OpcodeVImin: returnTypesFnV128, - OpcodeVUmin: returnTypesFnV128, - OpcodeVImax: returnTypesFnV128, - OpcodeVUmax: returnTypesFnV128, - OpcodeVImul: returnTypesFnV128, - OpcodeVAvgRound: returnTypesFnV128, - OpcodeVIabs: returnTypesFnV128, - OpcodeVIneg: returnTypesFnV128, - OpcodeVIpopcnt: returnTypesFnV128, - OpcodeVIshl: returnTypesFnV128, - OpcodeVSshr: returnTypesFnV128, - OpcodeVUshr: returnTypesFnV128, - OpcodeExtractlane: returnTypesFnSingle, - OpcodeInsertlane: returnTypesFnV128, - OpcodeBand: returnTypesFnSingle, - OpcodeFcopysign: returnTypesFnSingle, - OpcodeBitcast: returnTypesFnSingle, - OpcodeBor: returnTypesFnSingle, - OpcodeBxor: returnTypesFnSingle, - OpcodeRotl: returnTypesFnSingle, - OpcodeRotr: returnTypesFnSingle, - OpcodeIshl: returnTypesFnSingle, - OpcodeSshr: returnTypesFnSingle, - OpcodeSdiv: returnTypesFnSingle, - OpcodeSrem: returnTypesFnSingle, - OpcodeUdiv: returnTypesFnSingle, - OpcodeUrem: returnTypesFnSingle, - OpcodeUshr: returnTypesFnSingle, - OpcodeJump: returnTypesFnNoReturns, - OpcodeUndefined: returnTypesFnNoReturns, - OpcodeIconst: returnTypesFnSingle, - OpcodeSelect: returnTypesFnSingle, - OpcodeSExtend: returnTypesFnSingle, - OpcodeUExtend: returnTypesFnSingle, - OpcodeSwidenLow: returnTypesFnV128, - OpcodeUwidenLow: returnTypesFnV128, - OpcodeSwidenHigh: returnTypesFnV128, - OpcodeUwidenHigh: returnTypesFnV128, - OpcodeSnarrow: returnTypesFnV128, - OpcodeUnarrow: returnTypesFnV128, - OpcodeSwizzle: returnTypesFnSingle, - OpcodeShuffle: returnTypesFnV128, - OpcodeSplat: returnTypesFnV128, - OpcodeIreduce: returnTypesFnSingle, - OpcodeFabs: returnTypesFnSingle, - OpcodeSqrt: returnTypesFnSingle, - OpcodeCeil: returnTypesFnSingle, - OpcodeFloor: returnTypesFnSingle, - OpcodeTrunc: returnTypesFnSingle, - OpcodeNearest: returnTypesFnSingle, - OpcodeCallIndirect: func(b *builder, instr *Instruction) (t1 Type, ts []Type) { - sigID := SignatureID(instr.u1) - sig, ok := b.signatures[sigID] - if !ok { - panic("BUG") - } - switch len(sig.Results) { - case 0: - t1 = typeInvalid - case 1: - t1 = sig.Results[0] - default: - t1, ts = sig.Results[0], sig.Results[1:] - } - return - }, - OpcodeCall: func(b *builder, instr *Instruction) (t1 Type, ts []Type) { - sigID := SignatureID(instr.u2) - sig, ok := b.signatures[sigID] - if !ok { - panic("BUG") - } - switch len(sig.Results) { - case 0: - t1 = typeInvalid - case 1: - t1 = sig.Results[0] - default: - t1, ts = sig.Results[0], sig.Results[1:] - } - return - }, + OpcodeExtIaddPairwise: returnTypesFnV128, + OpcodeVbor: returnTypesFnV128, + OpcodeVbxor: returnTypesFnV128, + OpcodeVband: returnTypesFnV128, + OpcodeVbnot: returnTypesFnV128, + OpcodeVbandnot: returnTypesFnV128, + OpcodeVbitselect: returnTypesFnV128, + OpcodeVanyTrue: returnTypesFnI32, + OpcodeVallTrue: returnTypesFnI32, + OpcodeVhighBits: returnTypesFnI32, + OpcodeVIadd: returnTypesFnV128, + OpcodeVSaddSat: returnTypesFnV128, + OpcodeVUaddSat: returnTypesFnV128, + OpcodeVIsub: returnTypesFnV128, + OpcodeVSsubSat: returnTypesFnV128, + OpcodeVUsubSat: returnTypesFnV128, + OpcodeVIcmp: returnTypesFnV128, + OpcodeVImin: returnTypesFnV128, + OpcodeVUmin: returnTypesFnV128, + OpcodeVImax: returnTypesFnV128, + OpcodeVUmax: returnTypesFnV128, + OpcodeVImul: returnTypesFnV128, + OpcodeVAvgRound: returnTypesFnV128, + OpcodeVIabs: returnTypesFnV128, + OpcodeVIneg: returnTypesFnV128, + OpcodeVIpopcnt: returnTypesFnV128, + OpcodeVIshl: returnTypesFnV128, + OpcodeVSshr: returnTypesFnV128, + OpcodeVUshr: returnTypesFnV128, + OpcodeExtractlane: returnTypesFnSingle, + OpcodeInsertlane: returnTypesFnV128, + OpcodeBand: returnTypesFnSingle, + OpcodeFcopysign: returnTypesFnSingle, + OpcodeBitcast: returnTypesFnSingle, + OpcodeBor: returnTypesFnSingle, + OpcodeBxor: returnTypesFnSingle, + OpcodeRotl: returnTypesFnSingle, + OpcodeRotr: returnTypesFnSingle, + OpcodeIshl: returnTypesFnSingle, + OpcodeSshr: returnTypesFnSingle, + OpcodeSdiv: returnTypesFnSingle, + OpcodeSrem: returnTypesFnSingle, + OpcodeUdiv: returnTypesFnSingle, + OpcodeUrem: returnTypesFnSingle, + OpcodeUshr: returnTypesFnSingle, + OpcodeJump: returnTypesFnNoReturns, + OpcodeUndefined: returnTypesFnNoReturns, + OpcodeIconst: returnTypesFnSingle, + OpcodeSelect: returnTypesFnSingle, + OpcodeSExtend: returnTypesFnSingle, + OpcodeUExtend: returnTypesFnSingle, + OpcodeSwidenLow: returnTypesFnV128, + OpcodeUwidenLow: returnTypesFnV128, + OpcodeSwidenHigh: returnTypesFnV128, + OpcodeUwidenHigh: returnTypesFnV128, + OpcodeSnarrow: returnTypesFnV128, + OpcodeUnarrow: returnTypesFnV128, + OpcodeSwizzle: returnTypesFnSingle, + OpcodeShuffle: returnTypesFnV128, + OpcodeSplat: returnTypesFnV128, + OpcodeIreduce: returnTypesFnSingle, + OpcodeFabs: returnTypesFnSingle, + OpcodeSqrt: returnTypesFnSingle, + OpcodeCeil: returnTypesFnSingle, + OpcodeFloor: returnTypesFnSingle, + OpcodeTrunc: returnTypesFnSingle, + OpcodeNearest: returnTypesFnSingle, + OpcodeCallIndirect: returnTypesFnCallIndirect, + OpcodeCall: returnTypesFnCall, OpcodeLoad: returnTypesFnSingle, OpcodeVZeroExtLoad: returnTypesFnV128, OpcodeLoadSplat: returnTypesFnV128, @@ -1032,6 +1044,8 @@ var instructionReturnTypes = [opcodeEnd]returnTypesFn{ OpcodeAtomicStore: returnTypesFnNoReturns, OpcodeAtomicCas: returnTypesFnSingle, OpcodeFence: returnTypesFnNoReturns, + OpcodeTailCallReturnCallIndirect: returnTypesFnCallIndirect, + OpcodeTailCallReturnCall: returnTypesFnCall, OpcodeWideningPairwiseDotProductS: returnTypesFnV128, } @@ -2038,6 +2052,25 @@ func (i *Instruction) AtomicTargetSize() (size uint64) { return i.u1 } +// AsTailCallReturnCall initializes this instruction as a call instruction with OpcodeTailCallReturnCall. +func (i *Instruction) AsTailCallReturnCall(ref FuncRef, sig *Signature, args Values) { + i.opcode = OpcodeTailCallReturnCall + i.u1 = uint64(ref) + i.vs = args + i.u2 = uint64(sig.ID) + sig.used = true +} + +// AsTailCallReturnCallIndirect initializes this instruction as a call-indirect instruction with OpcodeTailCallReturnCallIndirect. +func (i *Instruction) AsTailCallReturnCallIndirect(funcPtr Value, sig *Signature, args Values) *Instruction { + i.opcode = OpcodeTailCallReturnCallIndirect + i.vs = args + i.v = funcPtr + i.u1 = uint64(sig.ID) + sig.used = true + return i +} + // ReturnVals returns the return values of OpcodeReturn. func (i *Instruction) ReturnVals() []Value { return i.vs.View() @@ -2166,7 +2199,7 @@ func (i *Instruction) AsCall(ref FuncRef, sig *Signature, args Values) { // CallData returns the call data for this instruction necessary for backends. func (i *Instruction) CallData() (ref FuncRef, sigID SignatureID, args []Value) { - if i.opcode != OpcodeCall { + if i.opcode != OpcodeCall && i.opcode != OpcodeTailCallReturnCall { panic("BUG: CallData only available for OpcodeCall") } ref = FuncRef(i.u1) @@ -2195,8 +2228,8 @@ func (i *Instruction) AsCallGoRuntimeMemmove(funcPtr Value, sig *Signature, args // CallIndirectData returns the call indirect data for this instruction necessary for backends. func (i *Instruction) CallIndirectData() (funcPtr Value, sigID SignatureID, args []Value, isGoMemmove bool) { - if i.opcode != OpcodeCallIndirect { - panic("BUG: CallIndirectData only available for OpcodeCallIndirect") + if i.opcode != OpcodeCallIndirect && i.opcode != OpcodeTailCallReturnCallIndirect { + panic("BUG: CallIndirectData only available for OpcodeCallIndirect and OpcodeTailCallReturnCallIndirect") } funcPtr = i.v sigID = SignatureID(i.u1) @@ -2620,6 +2653,17 @@ func (i *Instruction) Format(b Builder) string { instSuffix = fmt.Sprintf("_%d, %s, %s, %s", 8*i.u1, i.v.Format(b), i.v2.Format(b), i.v3.Format(b)) case OpcodeFence: instSuffix = fmt.Sprintf(" %d", i.u1) + case OpcodeTailCallReturnCall, OpcodeTailCallReturnCallIndirect: + view := i.vs.View() + vs := make([]string, len(view)) + for idx := range vs { + vs[idx] = view[idx].Format(b) + } + if i.opcode == OpcodeCallIndirect { + instSuffix = fmt.Sprintf(" %s:%s, %s", i.v.Format(b), SignatureID(i.u1), strings.Join(vs, ", ")) + } else { + instSuffix = fmt.Sprintf(" %s:%s, %s", FuncRef(i.u1), SignatureID(i.u2), strings.Join(vs, ", ")) + } case OpcodeWideningPairwiseDotProductS: instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), i.v2.Format(b)) default: @@ -2879,6 +2923,10 @@ func (o Opcode) String() (ret string) { return "AtomicStore" case OpcodeFence: return "Fence" + case OpcodeTailCallReturnCall: + return "ReturnCall" + case OpcodeTailCallReturnCallIndirect: + return "ReturnCallIndirect" case OpcodeVbor: return "Vbor" case OpcodeVbxor: diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go index 2db61e219..783ab122a 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go @@ -6,6 +6,7 @@ import ( "fmt" "math/rand" "os" + "sync" "time" ) @@ -91,7 +92,7 @@ type ( initialCompilationDone bool maybeRandomizedIndexes []int r *rand.Rand - values map[string]string + values sync.Map } verifierStateContextKey struct{} currentFunctionNameKey struct{} @@ -106,31 +107,24 @@ func NewDeterministicCompilationVerifierContext(ctx context.Context, localFuncti } r := rand.New(rand.NewSource(time.Now().UnixNano())) return context.WithValue(ctx, verifierStateContextKey{}, &verifierState{ - r: r, maybeRandomizedIndexes: maybeRandomizedIndexes, values: map[string]string{}, + r: r, maybeRandomizedIndexes: maybeRandomizedIndexes, values: sync.Map{}, }) } // DeterministicCompilationVerifierRandomizeIndexes randomizes the indexes for the deterministic compilation verifier. -// To get the randomized index, use DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex. -func DeterministicCompilationVerifierRandomizeIndexes(ctx context.Context) { +// Returns a slice that maps an index to the randomized index. +func DeterministicCompilationVerifierRandomizeIndexes(ctx context.Context) []int { state := ctx.Value(verifierStateContextKey{}).(*verifierState) if !state.initialCompilationDone { // If this is the first attempt, we use the index as-is order. state.initialCompilationDone = true - return + return state.maybeRandomizedIndexes } r := state.r r.Shuffle(len(state.maybeRandomizedIndexes), func(i, j int) { state.maybeRandomizedIndexes[i], state.maybeRandomizedIndexes[j] = state.maybeRandomizedIndexes[j], state.maybeRandomizedIndexes[i] }) -} - -// DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex returns the randomized index for the given `index` -// which is assigned by DeterministicCompilationVerifierRandomizeIndexes. -func DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex(ctx context.Context, index int) int { - state := ctx.Value(verifierStateContextKey{}).(*verifierState) - ret := state.maybeRandomizedIndexes[index] - return ret + return state.maybeRandomizedIndexes } // VerifyOrSetDeterministicCompilationContextValue verifies that the `newValue` is the same as the previous value for the given `scope` @@ -141,9 +135,8 @@ func VerifyOrSetDeterministicCompilationContextValue(ctx context.Context, scope fn := ctx.Value(currentFunctionNameKey{}).(string) key := fn + ": " + scope verifierCtx := ctx.Value(verifierStateContextKey{}).(*verifierState) - oldValue, ok := verifierCtx.values[key] - if !ok { - verifierCtx.values[key] = newValue + oldValue, loaded := verifierCtx.values.LoadOrStore(key, newValue) + if !loaded { return } if oldValue != newValue { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go index 313e34f9a..d67a3262d 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go @@ -69,7 +69,7 @@ type IDedPool[T any] struct { // NewIDedPool returns a new IDedPool. func NewIDedPool[T any](resetFn func(*T)) IDedPool[T] { - return IDedPool[T]{pool: NewPool[T](resetFn), maxIDEncountered: -1} + return IDedPool[T]{pool: NewPool(resetFn), maxIDEncountered: -1} } // GetOrAllocate returns the T with the given id. @@ -134,10 +134,10 @@ type VarLength[T any] struct { // NewVarLengthPool returns a new VarLengthPool. func NewVarLengthPool[T any]() VarLengthPool[T] { return VarLengthPool[T]{ - arrayPool: NewPool[varLengthPoolArray[T]](func(v *varLengthPoolArray[T]) { + arrayPool: NewPool(func(v *varLengthPoolArray[T]) { v.next = 0 }), - slicePool: NewPool[[]T](func(i *[]T) { + slicePool: NewPool(func(i *[]T) { *i = (*i)[:0] }), } @@ -155,6 +155,9 @@ func (p *VarLengthPool[T]) Allocate(knownMin int) VarLength[T] { return VarLength[T]{arr: arr} } slc := p.slicePool.Allocate() + if cap(*slc) < knownMin { + *slc = make([]T, 0, knownMin) + } return VarLength[T]{slc: slc} } @@ -166,39 +169,36 @@ func (p *VarLengthPool[T]) Reset() { // Append appends items to the backing slice just like the `append` builtin function in Go. func (i VarLength[T]) Append(p *VarLengthPool[T], items ...T) VarLength[T] { - if i.slc != nil { - *i.slc = append(*i.slc, items...) + slc := i.slc + if slc != nil { + *slc = append(*slc, items...) return i } - if i.arr == nil { - i.arr = p.arrayPool.Allocate() + arr := i.arr + if arr == nil { + arr = p.arrayPool.Allocate() + i.arr = arr } - arr := i.arr if arr.next+len(items) <= arraySize { - for _, item := range items { - arr.arr[arr.next] = item - arr.next++ - } + arr.next += copy(arr.arr[arr.next:], items) } else { - slc := p.slicePool.Allocate() + slc = p.slicePool.Allocate() // Copy the array to the slice. - for ptr := 0; ptr < arr.next; ptr++ { - *slc = append(*slc, arr.arr[ptr]) - } + *slc = append(*slc, arr.arr[:arr.next]...) + *slc = append(*slc, items...) i.slc = slc - *i.slc = append(*i.slc, items...) } return i } // View returns the backing slice. func (i VarLength[T]) View() []T { - if i.slc != nil { + if slc := i.slc; slc != nil { return *i.slc - } else if i.arr != nil { - arr := i.arr + } + if arr := i.arr; arr != nil { return arr.arr[:arr.next] } return nil @@ -207,9 +207,9 @@ func (i VarLength[T]) View() []T { // Cut cuts the backing slice to the given length. // Precondition: n <= len(i.backing). func (i VarLength[T]) Cut(n int) { - if i.slc != nil { - *i.slc = (*i.slc)[:n] - } else if i.arr != nil { - i.arr.next = n + if slc := i.slc; slc != nil { + *slc = (*slc)[:n] + } else if arr := i.arr; arr != nil { + arr.next = n } } |
