summaryrefslogtreecommitdiff
path: root/vendor/github.com/tetratelabs/wazero/internal/engine
diff options
context:
space:
mode:
authorLibravatar kim <grufwub@gmail.com>2025-11-10 07:29:48 +0100
committerLibravatar tobi <tobi.smethurst@protonmail.com>2025-11-17 14:14:33 +0100
commit6a3b09a507aca0498845d9118a21a82bb5054301 (patch)
tree5297960ecfe66f723179eb5a1a6f8d59504c3433 /vendor/github.com/tetratelabs/wazero/internal/engine
parent[performance] add optional S3 object info caching (#4546) (diff)
downloadgotosocial-6a3b09a507aca0498845d9118a21a82bb5054301.tar.xz
[chore] update dependencies (#4547)
- codeberg.org/gruf/go-ffmpreg: v0.6.12 -> v0.6.14 - github.com/ncruces/go-sqlite3: v0.30.0 -> v0.30.1 - github.com/wazero/wazero: v1.9.0 -> v1.10.0 Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4547 Co-authored-by: kim <grufwub@gmail.com> Co-committed-by: kim <grufwub@gmail.com>
Diffstat (limited to 'vendor/github.com/tetratelabs/wazero/internal/engine')
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go45
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go161
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go33
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go4
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go11
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go54
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go33
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go101
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go31
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go65
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go28
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go8
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go3
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go19
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go10
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go633
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go29
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go2
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go339
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go19
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go264
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go25
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go48
23 files changed, 1311 insertions, 654 deletions
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go
index 4e20e4b2c..4269d237b 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go
@@ -814,6 +814,7 @@ operatorSwitch:
c.emit(
newOperationCallIndirect(typeIndex, tableIndex),
)
+
case wasm.OpcodeDrop:
r := inclusiveRange{Start: 0, End: 0}
if peekValueType == unsignedTypeV128 {
@@ -3423,6 +3424,45 @@ operatorSwitch:
default:
return fmt.Errorf("unsupported atomic instruction in interpreterir: %s", wasm.AtomicInstructionName(atomicOp))
}
+
+ case wasm.OpcodeTailCallReturnCall:
+ fdef := c.module.FunctionDefinition(index)
+ functionFrame := c.controlFrames.functionFrame()
+ // Currently we do not support imported functions, we treat them as regular calls.
+ // For details, see internal/engine/RATIONALE.md
+ if _, _, isImport := fdef.Import(); isImport {
+ c.emit(newOperationCall(index))
+ dropOp := newOperationDrop(c.getFrameDropRange(functionFrame, false))
+
+ // Cleanup the stack and then jmp to function frame's continuation (meaning return).
+ c.emit(dropOp)
+ c.emit(newOperationBr(functionFrame.asLabel()))
+ } else {
+ c.emit(newOperationTailCallReturnCall(index))
+ }
+
+ // Return operation is stack-polymorphic, and mark the state as unreachable.
+ // That means subsequent instructions in the current control frame are "unreachable"
+ // and can be safely removed.
+ c.markUnreachable()
+
+ case wasm.OpcodeTailCallReturnCallIndirect:
+ typeIndex := index
+ tableIndex, n, err := leb128.LoadUint32(c.body[c.pc+1:])
+ if err != nil {
+ return fmt.Errorf("read target for br_table: %w", err)
+ }
+ c.pc += n
+
+ functionFrame := c.controlFrames.functionFrame()
+ dropRange := c.getFrameDropRange(functionFrame, false)
+ c.emit(newOperationTailCallReturnCallIndirect(typeIndex, tableIndex, dropRange, functionFrame.asLabel()))
+
+ // Return operation is stack-polymorphic, and mark the state as unreachable.
+ // That means subsequent instructions in the current control frame are "unreachable"
+ // and can be safely removed.
+ c.markUnreachable()
+
default:
return fmt.Errorf("unsupported instruction in interpreterir: 0x%x", op)
}
@@ -3449,7 +3489,10 @@ func (c *compiler) applyToStack(opcode wasm.Opcode) (index uint32, err error) {
wasm.OpcodeLocalSet,
wasm.OpcodeLocalTee,
wasm.OpcodeGlobalGet,
- wasm.OpcodeGlobalSet:
+ wasm.OpcodeGlobalSet,
+ // tail-call proposal
+ wasm.OpcodeTailCallReturnCall,
+ wasm.OpcodeTailCallReturnCallIndirect:
// Assumes that we are at the opcode now so skip it before read immediates.
v, num, err := leb128.LoadUint32(c.body[c.pc+1:])
if err != nil {
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go
index 5b5e6e9d0..6f2fa949a 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go
@@ -7,6 +7,7 @@ import (
"fmt"
"math"
"math/bits"
+ "slices"
"sync"
"unsafe"
@@ -27,27 +28,37 @@ import (
// The default value should suffice for most use cases. Those wishing to change this can via `go build -ldflags`.
var callStackCeiling = 2000
+type compiledFunctionWithCount struct {
+ funcs []compiledFunction
+ refCount int
+}
+
// engine is an interpreter implementation of wasm.Engine
type engine struct {
enabledFeatures api.CoreFeatures
- compiledFunctions map[wasm.ModuleID][]compiledFunction // guarded by mutex.
- mux sync.RWMutex
+ compiledFunctions map[wasm.ModuleID]*compiledFunctionWithCount // guarded by mutex.
+ mux sync.Mutex
}
func NewEngine(_ context.Context, enabledFeatures api.CoreFeatures, _ filecache.Cache) wasm.Engine {
return &engine{
enabledFeatures: enabledFeatures,
- compiledFunctions: map[wasm.ModuleID][]compiledFunction{},
+ compiledFunctions: map[wasm.ModuleID]*compiledFunctionWithCount{},
}
}
// Close implements the same method as documented on wasm.Engine.
func (e *engine) Close() (err error) {
+ e.mux.Lock()
+ defer e.mux.Unlock()
+ clear(e.compiledFunctions)
return
}
// CompiledModuleCount implements the same method as documented on wasm.Engine.
func (e *engine) CompiledModuleCount() uint32 {
+ e.mux.Lock()
+ defer e.mux.Unlock()
return uint32(len(e.compiledFunctions))
}
@@ -59,19 +70,33 @@ func (e *engine) DeleteCompiledModule(m *wasm.Module) {
func (e *engine) deleteCompiledFunctions(module *wasm.Module) {
e.mux.Lock()
defer e.mux.Unlock()
+ cf, ok := e.compiledFunctions[module.ID]
+ if !ok {
+ return
+ }
+ cf.refCount--
+ if cf.refCount > 0 {
+ return
+ }
delete(e.compiledFunctions, module.ID)
}
func (e *engine) addCompiledFunctions(module *wasm.Module, fs []compiledFunction) {
e.mux.Lock()
defer e.mux.Unlock()
- e.compiledFunctions[module.ID] = fs
+ e.compiledFunctions[module.ID] = &compiledFunctionWithCount{funcs: fs, refCount: 1}
}
-func (e *engine) getCompiledFunctions(module *wasm.Module) (fs []compiledFunction, ok bool) {
- e.mux.RLock()
- defer e.mux.RUnlock()
- fs, ok = e.compiledFunctions[module.ID]
+func (e *engine) getCompiledFunctions(module *wasm.Module, increaseRefCount bool) (fs []compiledFunction, ok bool) {
+ e.mux.Lock()
+ defer e.mux.Unlock()
+ cf, ok := e.compiledFunctions[module.ID]
+ if ok {
+ fs = cf.funcs
+ if increaseRefCount {
+ cf.refCount++
+ }
+ }
return
}
@@ -242,15 +267,9 @@ type snapshot struct {
// Snapshot implements the same method as documented on experimental.Snapshotter.
func (ce *callEngine) Snapshot() experimental.Snapshot {
- stack := make([]uint64, len(ce.stack))
- copy(stack, ce.stack)
-
- frames := make([]*callFrame, len(ce.frames))
- copy(frames, ce.frames)
-
return &snapshot{
- stack: stack,
- frames: frames,
+ stack: slices.Clone(ce.stack),
+ frames: slices.Clone(ce.frames),
ce: ce,
}
}
@@ -356,7 +375,7 @@ const callFrameStackSize = 0
// CompileModule implements the same method as documented on wasm.Engine.
func (e *engine) CompileModule(_ context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) error {
- if _, ok := e.getCompiledFunctions(module); ok { // cache hit!
+ if _, ok := e.getCompiledFunctions(module, true); ok { // cache hit!
return nil
}
@@ -405,7 +424,7 @@ func (e *engine) NewModuleEngine(module *wasm.Module, instance *wasm.ModuleInsta
functions: make([]function, len(module.FunctionSection)+int(module.ImportFunctionCount)),
}
- codes, ok := e.getCompiledFunctions(module)
+ codes, ok := e.getCompiledFunctions(module, false)
if !ok {
return nil, errors.New("source module must be compiled before instantiation")
}
@@ -427,12 +446,10 @@ func (e *engine) NewModuleEngine(module *wasm.Module, instance *wasm.ModuleInsta
// lowerIR lowers the interpreterir operations to engine friendly struct.
func (e *engine) lowerIR(ir *compilationResult, ret *compiledFunction) error {
// Copy the body from the result.
- ret.body = make([]unionOperation, len(ir.Operations))
- copy(ret.body, ir.Operations)
+ ret.body = slices.Clone(ir.Operations)
// Also copy the offsets if necessary.
if offsets := ir.IROperationSourceOffsetsInWasmBinary; len(offsets) > 0 {
- ret.offsetsInWasmBinary = make([]uint64, len(offsets))
- copy(ret.offsetsInWasmBinary, offsets)
+ ret.offsetsInWasmBinary = slices.Clone(offsets)
}
labelAddressResolutions := [labelKindNum][]uint64{}
@@ -449,9 +466,7 @@ func (e *engine) lowerIR(ir *compilationResult, ret *compiledFunction) error {
frameToAddresses := labelAddressResolutions[label.Kind()]
// Expand the slice if necessary.
if diff := fid - len(frameToAddresses) + 1; diff > 0 {
- for j := 0; j < diff; j++ {
- frameToAddresses = append(frameToAddresses, 0)
- }
+ frameToAddresses = append(frameToAddresses, make([]uint64, diff)...)
}
frameToAddresses[fid] = address
labelAddressResolutions[kind] = frameToAddresses
@@ -472,6 +487,8 @@ func (e *engine) lowerIR(ir *compilationResult, ret *compiledFunction) error {
target := op.Us[j]
e.setLabelAddress(&op.Us[j], label(target), labelAddressResolutions)
}
+ case operationKindTailCallReturnCallIndirect:
+ e.setLabelAddress(&op.Us[1], label(op.Us[1]), labelAddressResolutions)
}
}
return nil
@@ -761,18 +778,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance
case operationKindCallIndirect:
offset := ce.popValue()
table := tables[op.U2]
- if offset >= uint64(len(table.References)) {
- panic(wasmruntime.ErrRuntimeInvalidTableAccess)
- }
- rawPtr := table.References[offset]
- if rawPtr == 0 {
- panic(wasmruntime.ErrRuntimeInvalidTableAccess)
- }
-
- tf := functionFromUintptr(rawPtr)
- if tf.typeID != typeIDs[op.U1] {
- panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
- }
+ tf := ce.functionForOffset(table, offset, typeIDs[op.U1])
ce.callFunction(ctx, f.moduleInstance, tf)
frame.pc++
@@ -1725,12 +1731,17 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance
if fillSize+offset > uint64(len(memoryInst.Buffer)) {
panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
} else if fillSize != 0 {
- // Uses the copy trick for faster filling buffer.
- // https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
+ // Uses the copy trick for faster filling the buffer with the value.
+ // https://github.com/golang/go/blob/go1.24.0/src/bytes/bytes.go#L664-L673
buf := memoryInst.Buffer[offset : offset+fillSize]
- buf[0] = value
- for i := 1; i < len(buf); i *= 2 {
- copy(buf[i:], buf[:i])
+ if value == 0 {
+ clear(buf)
+ } else {
+ buf[0] = value
+ for i := 1; i < len(buf); {
+ chunk := min(i, 8192)
+ i += copy(buf[i:], buf[:chunk])
+ }
}
}
frame.pc++
@@ -1804,7 +1815,7 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance
panic(wasmruntime.ErrRuntimeInvalidTableAccess)
} else if num > 0 {
// Uses the copy trick for faster filling the region with the value.
- // https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
+ // https://github.com/golang/go/blob/go1.24.0/src/slices/slices.go#L514-L517
targetRegion := table.References[offset : offset+num]
targetRegion[0] = ref
for i := 1; i < len(targetRegion); i *= 2 {
@@ -4331,6 +4342,32 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance
memoryInst.Mux.Unlock()
ce.pushValue(uint64(old))
frame.pc++
+ case operationKindTailCallReturnCall:
+ f := &functions[op.U1]
+ ce.dropForTailCall(frame, f)
+ body, bodyLen = ce.resetPc(frame, f)
+
+ case operationKindTailCallReturnCallIndirect:
+ offset := ce.popValue()
+ table := tables[op.U2]
+ tf := ce.functionForOffset(table, offset, typeIDs[op.U1])
+
+ // We are allowing proper tail calls only across functions that belong to the same
+ // module; for indirect calls, we have to enforce it at run-time.
+ // For details, see internal/engine/RATIONALE.md
+ if tf.moduleInstance != f.moduleInstance {
+ // Revert to a normal call.
+ ce.callFunction(ctx, f.moduleInstance, tf)
+ // Return
+ ce.drop(op.Us[0])
+ // Jump to the function frame (return)
+ frame.pc = op.Us[1]
+ continue
+ }
+
+ ce.dropForTailCall(frame, tf)
+ body, bodyLen = ce.resetPc(frame, tf)
+
default:
frame.pc++
}
@@ -4338,6 +4375,40 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance
ce.popFrame()
}
+func (ce *callEngine) dropForTailCall(frame *callFrame, f *function) {
+ base := frame.base - frame.f.funcType.ParamNumInUint64
+ paramCount := f.funcType.ParamNumInUint64
+ ce.stack = append(ce.stack[:base], ce.stack[len(ce.stack)-paramCount:]...)
+}
+
+func (ce *callEngine) resetPc(frame *callFrame, f *function) (body []unionOperation, bodyLen uint64) {
+ // The compiler is currently allowing proper tail call only across functions
+ // that belong to the same module; thus, we can overwrite the frame in-place.
+ // For details, see internal/engine/RATIONALE.md
+ frame.f = f
+ frame.base = len(ce.stack)
+ frame.pc = 0
+ body = frame.f.parent.body
+ bodyLen = uint64(len(body))
+ return body, bodyLen
+}
+
+func (ce *callEngine) functionForOffset(table *wasm.TableInstance, offset uint64, expectedTypeID wasm.FunctionTypeID) *function {
+ if offset >= uint64(len(table.References)) {
+ panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+ }
+ rawPtr := table.References[offset]
+ if rawPtr == 0 {
+ panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+ }
+
+ tf := functionFromUintptr(rawPtr)
+ if tf.typeID != expectedTypeID {
+ panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
+ }
+ return tf
+}
+
func wasmCompatMax32bits(v1, v2 uint32) uint64 {
return uint64(math.Float32bits(moremath.WasmCompatMax32(
math.Float32frombits(v1),
@@ -4564,9 +4635,7 @@ func (ce *callEngine) callGoFuncWithStack(ctx context.Context, m *wasm.ModuleIns
// In the interpreter engine, ce.stack may only have capacity to store
// parameters. Grow when there are more results than parameters.
if growLen := resultLen - paramLen; growLen > 0 {
- for i := 0; i < growLen; i++ {
- ce.stack = append(ce.stack, 0)
- }
+ ce.stack = append(ce.stack, make([]uint64, growLen)...)
stackLen += growLen
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go
index 3087a718f..db3cfa250 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go
@@ -445,6 +445,10 @@ func (o operationKind) String() (ret string) {
ret = "operationKindAtomicRMW8Cmpxchg"
case operationKindAtomicRMW16Cmpxchg:
ret = "operationKindAtomicRMW16Cmpxchg"
+ case operationKindTailCallReturnCall:
+ ret = "operationKindTailCallReturnCall"
+ case operationKindTailCallReturnCallIndirect:
+ ret = "operationKindTailCallReturnCallIndirect"
default:
panic(fmt.Errorf("unknown operation %d", o))
}
@@ -768,6 +772,11 @@ const (
// operationKindAtomicRMW16Cmpxchg is the kind for NewOperationAtomicRMW16Cmpxchg.
operationKindAtomicRMW16Cmpxchg
+ // operationKindTailCallReturnCall is the Kind for newOperationTailCallReturnCall.
+ operationKindTailCallReturnCall
+ // operationKindTailCallReturnCallIndirect is the Kind for newOperationKindTailCallReturnCallIndirect.
+ operationKindTailCallReturnCallIndirect
+
// operationKindEnd is always placed at the bottom of this iota definition to be used in the test.
operationKindEnd
)
@@ -1097,6 +1106,12 @@ func (o unionOperation) String() string {
operationKindAtomicRMW16Cmpxchg:
return o.Kind.String()
+ case operationKindTailCallReturnCall:
+ return fmt.Sprintf("%s %d %s", o.Kind, o.U1, label(o.U2).String())
+
+ case operationKindTailCallReturnCallIndirect:
+ return fmt.Sprintf("%s %d %d", o.Kind, o.U1, o.U2)
+
default:
panic(fmt.Sprintf("TODO: %v", o.Kind))
}
@@ -2810,3 +2825,21 @@ func newOperationAtomicRMW8Cmpxchg(unsignedType unsignedType, arg memoryArg) uni
func newOperationAtomicRMW16Cmpxchg(unsignedType unsignedType, arg memoryArg) unionOperation {
return unionOperation{Kind: operationKindAtomicRMW16Cmpxchg, B1: byte(unsignedType), U1: uint64(arg.Alignment), U2: uint64(arg.Offset)}
}
+
+// newOperationTailCallReturnCall is a constructor for unionOperation with operationKindTailCallReturnCall.
+//
+// This corresponds to
+//
+// wasm.OpcodeTailCallReturnCall.
+func newOperationTailCallReturnCall(functionIndex uint32) unionOperation {
+ return unionOperation{Kind: operationKindTailCallReturnCall, U1: uint64(functionIndex)}
+}
+
+// NewOperationCallIndirect is a constructor for unionOperation with operationKindTailCallReturnCallIndirect.
+//
+// This corresponds to
+//
+// wasm.OpcodeTailCallReturnCallIndirect.
+func newOperationTailCallReturnCallIndirect(typeIndex, tableIndex uint32, dropDepth inclusiveRange, l label) unionOperation {
+ return unionOperation{Kind: operationKindTailCallReturnCallIndirect, U1: uint64(typeIndex), U2: uint64(tableIndex), Us: []uint64{dropDepth.AsU64(), uint64(l)}}
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go
index 7b9d5602d..da5ca3c15 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go
@@ -272,9 +272,9 @@ func (c *compiler) wasmOpcodeSignature(op wasm.Opcode, index uint32) (*signature
return signature_I32_None, nil
case wasm.OpcodeReturn:
return signature_None_None, nil
- case wasm.OpcodeCall:
+ case wasm.OpcodeCall, wasm.OpcodeTailCallReturnCall:
return c.funcTypeToSigs.get(c.funcs[index], false /* direct */), nil
- case wasm.OpcodeCallIndirect:
+ case wasm.OpcodeCallIndirect, wasm.OpcodeTailCallReturnCallIndirect:
return c.funcTypeToSigs.get(index, true /* call_indirect */), nil
case wasm.OpcodeDrop:
return signature_Unknown_None, nil
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go
index 62d365015..8e3f08efc 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go
@@ -88,7 +88,7 @@ type Compiler interface {
MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode
// AddRelocationInfo appends the relocation information for the function reference at the current buffer offset.
- AddRelocationInfo(funcRef ssa.FuncRef)
+ AddRelocationInfo(funcRef ssa.FuncRef, isTailCall bool)
// AddSourceOffsetInfo appends the source offset information for the given offset.
AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset)
@@ -115,6 +115,8 @@ type RelocationInfo struct {
Offset int64
// Target is the target function of the call instruction.
FuncRef ssa.FuncRef
+ // IsTailCall indicates whether the call instruction is a tail call.
+ IsTailCall bool
}
// compiler implements Compiler.
@@ -352,10 +354,11 @@ func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo {
}
// AddRelocationInfo implements Compiler.AddRelocationInfo.
-func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef) {
+func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef, isTailCall bool) {
c.relocations = append(c.relocations, RelocationInfo{
- Offset: int64(len(c.buf)),
- FuncRef: funcRef,
+ Offset: int64(len(c.buf)),
+ FuncRef: funcRef,
+ IsTailCall: isTailCall,
})
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go
index 6a3e58f51..901c87aaf 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go
@@ -21,7 +21,9 @@ type instruction struct {
func (i *instruction) IsCall() bool { return i.kind == call }
// IsIndirectCall implements regalloc.Instr.
-func (i *instruction) IsIndirectCall() bool { return i.kind == callIndirect }
+func (i *instruction) IsIndirectCall() bool {
+ return i.kind == callIndirect
+}
// IsReturn implements regalloc.Instr.
func (i *instruction) IsReturn() bool { return i.kind == ret }
@@ -288,6 +290,11 @@ func (i *instruction) String() string {
case nopUseReg:
return fmt.Sprintf("nop_use_reg %s", i.op1.format(true))
+ case tailCall:
+ return fmt.Sprintf("tailCall %s", ssa.FuncRef(i.u1))
+ case tailCallIndirect:
+ return fmt.Sprintf("tailCallIndirect %s", i.op1.format(true))
+
default:
panic(fmt.Sprintf("BUG: %d", int(i.kind)))
}
@@ -357,7 +364,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
default:
panic(fmt.Sprintf("BUG: invalid operand: %s", i))
}
- case useKindCallInd:
+ case useKindCallInd, useKindTailCallInd:
op := i.op1
switch op.kind {
case operandKindReg:
@@ -428,13 +435,16 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
func (i *instruction) AssignUse(index int, v regalloc.VReg) {
switch uk := useKinds[i.kind]; uk {
case useKindNone:
- case useKindCallInd:
+ case useKindCallInd, useKindTailCallInd:
if index != 0 {
panic("BUG")
}
op := &i.op1
switch op.kind {
case operandKindReg:
+ if uk == useKindTailCallInd && v != r11VReg {
+ panic("BUG")
+ }
op.setReg(v)
case operandKindMem:
op.addressMode().assignUses(index, v)
@@ -838,6 +848,12 @@ const (
// nopUseReg is a meta instruction that uses one register and does nothing.
nopUseReg
+ // tailCall is a meta instruction that emits a tail call.
+ tailCall
+
+ // tailCallIndirect is a meta instruction that emits a tail call with an indirect call.
+ tailCallIndirect
+
instrMax
)
@@ -1079,6 +1095,10 @@ func (k instructionKind) String() string {
return "lockcmpxchg"
case lockxadd:
return "lockxadd"
+ case tailCall:
+ return "tailCall"
+ case tailCallIndirect:
+ return "tailCallIndirect"
default:
panic("BUG")
}
@@ -1173,6 +1193,27 @@ func (i *instruction) asCallIndirect(ptr operand, abi *backend.FunctionABI) *ins
return i
}
+func (i *instruction) asTailCallReturnCall(ref ssa.FuncRef, abi *backend.FunctionABI) *instruction {
+ i.kind = tailCall
+ i.u1 = uint64(ref)
+ if abi != nil {
+ i.u2 = abi.ABIInfoAsUint64()
+ }
+ return i
+}
+
+func (i *instruction) asTailCallReturnCallIndirect(ptr operand, abi *backend.FunctionABI) *instruction {
+ if ptr.kind != operandKindReg && ptr.kind != operandKindMem {
+ panic("BUG")
+ }
+ i.kind = tailCallIndirect
+ i.op1 = ptr
+ if abi != nil {
+ i.u2 = abi.ABIInfoAsUint64()
+ }
+ return i
+}
+
func (i *instruction) asRet() *instruction {
i.kind = ret
return i
@@ -2342,6 +2383,8 @@ var defKinds = [instrMax]defKind{
lockxadd: defKindNone,
neg: defKindNone,
nopUseReg: defKindNone,
+ tailCall: defKindCall,
+ tailCallIndirect: defKindCall,
}
// String implements fmt.Stringer.
@@ -2375,6 +2418,7 @@ const (
useKindBlendvpd
useKindCall
useKindCallInd
+ useKindTailCallInd
useKindFcvtToSintSequence
useKindFcvtToUintSequence
)
@@ -2425,6 +2469,8 @@ var useKinds = [instrMax]useKind{
lockxadd: useKindOp1RegOp2,
neg: useKindOp1,
nopUseReg: useKindOp1,
+ tailCall: useKindCall,
+ tailCallIndirect: useKindTailCallInd,
}
func (u useKind) String() string {
@@ -2441,6 +2487,8 @@ func (u useKind) String() string {
return "call"
case useKindCallInd:
return "callInd"
+ case useKindTailCallInd:
+ return "tailCallInd"
default:
return "invalid"
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go
index 6637b428c..d1eefbdb5 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go
@@ -1211,7 +1211,7 @@ func (i *instruction) encode(c backend.Compiler) (needsLabelResolution bool) {
case call:
c.EmitByte(0xe8)
// Meaning that the call target is a function value, and requires relocation.
- c.AddRelocationInfo(ssa.FuncRef(i.u1))
+ c.AddRelocationInfo(ssa.FuncRef(i.u1), false)
// Note that this is zero as a placeholder for the call target if it's a function value.
c.Emit4Bytes(uint32(i.u2))
@@ -1244,6 +1244,37 @@ func (i *instruction) encode(c backend.Compiler) (needsLabelResolution bool) {
panic("BUG: invalid operand kind")
}
+ case tailCall:
+ // Encode as jmp.
+ c.EmitByte(0xe9)
+ // Meaning that the call target is a function value, and requires relocation.
+ c.AddRelocationInfo(ssa.FuncRef(i.u1), true)
+ // Note that this is zero as a placeholder for the call target if it's a function value.
+ c.Emit4Bytes(uint32(i.u2))
+
+ case tailCallIndirect:
+ op := i.op1
+
+ const opcodeNum = 1
+ const opcode = 0xff
+ const regMemSubOpcode = 4
+ rex := rexInfo(0).clearW()
+ switch op.kind {
+ // Indirect tail calls always take a register as the target.
+ // Note: the register should be a callee-saved register (usually r11).
+ case operandKindReg:
+ dst := regEncodings[op.reg().RealReg()]
+ encodeRegReg(c,
+ legacyPrefixesNone,
+ opcode, opcodeNum,
+ regMemSubOpcode,
+ dst,
+ rex,
+ )
+ default:
+ panic("BUG: invalid operand kind")
+ }
+
case xchg:
src, dst := regEncodings[i.op1.reg().RealReg()], i.op2
size := i.u1
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
index fd0d69ca9..57d9bb731 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
@@ -17,7 +17,7 @@ import (
// NewBackend returns a new backend for arm64.
func NewBackend() backend.Machine {
m := &machine{
- cpuFeatures: platform.CpuFeatures,
+ cpuFeatures: platform.CpuFeatures(),
regAlloc: regalloc.NewAllocator[*instruction, *labelPosition, *regAllocFn](regInfo),
spillSlots: map[regalloc.VRegID]int64{},
amodePool: wazevoapi.NewPool[amode](nil),
@@ -1109,6 +1109,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
atomicOp, size := instr.AtomicRmwData()
m.lowerAtomicRmw(atomicOp, addr, val, size, instr.Return())
+ case ssa.OpcodeTailCallReturnCall, ssa.OpcodeTailCallReturnCallIndirect:
+ m.lowerTailCall(instr)
+
default:
panic("TODO: lowering " + op.String())
}
@@ -1885,31 +1888,7 @@ func (m *machine) lowerStore(si *ssa.Instruction) {
func (m *machine) lowerCall(si *ssa.Instruction) {
isDirectCall := si.Opcode() == ssa.OpcodeCall
- var indirectCalleePtr ssa.Value
- var directCallee ssa.FuncRef
- var sigID ssa.SignatureID
- var args []ssa.Value
- var isMemmove bool
- if isDirectCall {
- directCallee, sigID, args = si.CallData()
- } else {
- indirectCalleePtr, sigID, args, isMemmove = si.CallIndirectData()
- }
- calleeABI := m.c.GetFunctionABI(m.c.SSABuilder().ResolveSignature(sigID))
-
- stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize())
- if m.maxRequiredStackSizeForCalls < stackSlotSize+16 {
- m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // 16 == return address + RBP.
- }
-
- // Note: See machine.SetupPrologue for the stack layout.
- // The stack pointer decrease/increase will be inserted later in the compilation.
-
- for i, arg := range args {
- reg := m.c.VRegOf(arg)
- def := m.c.ValueDefinition(arg)
- m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
- }
+ indirectCalleePtr, directCallee, isMemmove, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall)
if isMemmove {
// Go's memmove *might* use all xmm0-xmm15, so we need to release them.
@@ -1939,6 +1918,39 @@ func (m *machine) lowerCall(si *ssa.Instruction) {
m.insert(m.allocateInstr().asNopUseReg(regInfo.RealRegToVReg[rdx]))
}
+ m.insertReturns(si, calleeABI, stackSlotSize)
+}
+
+func (m *machine) prepareCall(si *ssa.Instruction, isDirectCall bool) (ssa.Value, ssa.FuncRef, bool, *backend.FunctionABI, int64) {
+ var indirectCalleePtr ssa.Value
+ var directCallee ssa.FuncRef
+ var sigID ssa.SignatureID
+ var args []ssa.Value
+ var isMemmove bool
+ if isDirectCall {
+ directCallee, sigID, args = si.CallData()
+ } else {
+ indirectCalleePtr, sigID, args, isMemmove = si.CallIndirectData()
+ }
+ calleeABI := m.c.GetFunctionABI(m.c.SSABuilder().ResolveSignature(sigID))
+
+ stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize())
+ if m.maxRequiredStackSizeForCalls < stackSlotSize+16 {
+ m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // 16 == return address + RBP.
+ }
+
+ // Note: See machine.SetupPrologue for the stack layout.
+ // The stack pointer decrease/increase will be inserted later in the compilation.
+
+ for i, arg := range args {
+ reg := m.c.VRegOf(arg)
+ def := m.c.ValueDefinition(arg)
+ m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
+ }
+ return indirectCalleePtr, directCallee, isMemmove, calleeABI, stackSlotSize
+}
+
+func (m *machine) insertReturns(si *ssa.Instruction, calleeABI *backend.FunctionABI, stackSlotSize int64) {
var index int
r1, rs := si.Returns()
if r1.Valid() {
@@ -1952,6 +1964,43 @@ func (m *machine) lowerCall(si *ssa.Instruction) {
}
}
+func (m *machine) lowerTailCall(si *ssa.Instruction) {
+ isDirectCall := si.Opcode() == ssa.OpcodeTailCallReturnCall
+ indirectCalleePtr, directCallee, isMemmove, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall)
+ if isMemmove {
+ panic("memmove not supported in tail calls")
+ }
+
+ isAllRegs := stackSlotSize == 0
+
+ switch {
+ case isDirectCall && isAllRegs:
+ call := m.allocateInstr().asTailCallReturnCall(directCallee, calleeABI)
+ m.insert(call)
+ case !isDirectCall && isAllRegs:
+ // In a tail call we insert the epilogue before the jump instruction,
+ // so an arbitrary register might be overwritten while restoring the stack.
+ // So, as compared to a regular indirect call, we ensure the pointer is stored
+ // in a caller-saved register (r11).
+ // For details, see internal/engine/RATIONALE.md
+ ptrOp := m.getOperand_Reg(m.c.ValueDefinition(indirectCalleePtr))
+ tmpJmp := r11VReg
+ m.InsertMove(tmpJmp, ptrOp.reg(), ssa.TypeI64)
+ callInd := m.allocateInstr().asTailCallReturnCallIndirect(newOperandReg(tmpJmp), calleeABI)
+ m.insert(callInd)
+ case isDirectCall && !isAllRegs:
+ call := m.allocateInstr().asCall(directCallee, calleeABI)
+ m.insert(call)
+ case !isDirectCall && !isAllRegs:
+ ptrOp := m.getOperand_Mem_Reg(m.c.ValueDefinition(indirectCalleePtr))
+ callInd := m.allocateInstr().asCallIndirect(ptrOp, calleeABI)
+ m.insert(callInd)
+ }
+
+ // If this is a proper tail call, returns will be cleared in the postRegAlloc phase.
+ m.insertReturns(si, calleeABI, stackSlotSize)
+}
+
// callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the
// caller side of the function call.
func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def backend.SSAValueDefinition, stackSlotSize int64) {
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go
index e53729860..fa3ca58a6 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go
@@ -188,6 +188,23 @@ func (m *machine) postRegAlloc() {
linkInstr(inc, next)
}
continue
+ case tailCall, tailCallIndirect:
+ // At this point, reg alloc is done, therefore we can safely insert dec RPS instruction
+ // right before the tail call (jump) instruction. If this is done before reg alloc, the stack slot
+ // can point to the wrong location and therefore results in a wrong value.
+ tailCall := cur
+ _, _, _, _, size := backend.ABIInfoFromUint64(tailCall.u2)
+ if size > 0 {
+ dec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(size), rspVReg, true)
+ linkInstr(tailCall.prev, dec)
+ linkInstr(dec, tailCall)
+ }
+ // In a tail call, we insert the epilogue before the jump instruction.
+ m.setupEpilogueAfter(tailCall.prev)
+ // If this has been encoded as a proper tail call, we can remove the trailing instructions
+ // For details, see internal/engine/RATIONALE.md
+ m.removeUntilRet(cur.next)
+ continue
}
// Removes the redundant copy instruction.
@@ -278,6 +295,20 @@ func (m *machine) setupEpilogueAfter(cur *instruction) {
linkInstr(cur, prevNext)
}
+// removeUntilRet removes the instructions starting from `cur` until the first `ret` instruction.
+func (m *machine) removeUntilRet(cur *instruction) {
+ for ; cur != nil; cur = cur.next {
+ prev, next := cur.prev, cur.next
+ prev.next = next
+ if next != nil {
+ next.prev = prev
+ }
+ if cur.kind == ret {
+ return
+ }
+ }
+}
+
func (m *machine) addRSP(offset int32, cur *instruction) *instruction {
if offset == 0 {
return cur
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
index d1eaa7cd4..c300c3d61 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
@@ -261,6 +261,23 @@ func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn reg
func (m *machine) lowerCall(si *ssa.Instruction) {
isDirectCall := si.Opcode() == ssa.OpcodeCall
+ indirectCalleePtr, directCallee, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall)
+
+ if isDirectCall {
+ call := m.allocateInstr()
+ call.asCall(directCallee, calleeABI)
+ m.insert(call)
+ } else {
+ ptr := m.compiler.VRegOf(indirectCalleePtr)
+ callInd := m.allocateInstr()
+ callInd.asCallIndirect(ptr, calleeABI)
+ m.insert(callInd)
+ }
+
+ m.insertReturns(si, calleeABI, stackSlotSize)
+}
+
+func (m *machine) prepareCall(si *ssa.Instruction, isDirectCall bool) (ssa.Value, ssa.FuncRef, *backend.FunctionABI, int64) {
var indirectCalleePtr ssa.Value
var directCallee ssa.FuncRef
var sigID ssa.SignatureID
@@ -282,18 +299,10 @@ func (m *machine) lowerCall(si *ssa.Instruction) {
def := m.compiler.ValueDefinition(arg)
m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
}
+ return indirectCalleePtr, directCallee, calleeABI, stackSlotSize
+}
- if isDirectCall {
- call := m.allocateInstr()
- call.asCall(directCallee, calleeABI)
- m.insert(call)
- } else {
- ptr := m.compiler.VRegOf(indirectCalleePtr)
- callInd := m.allocateInstr()
- callInd.asCallIndirect(ptr, calleeABI)
- m.insert(callInd)
- }
-
+func (m *machine) insertReturns(si *ssa.Instruction, calleeABI *backend.FunctionABI, stackSlotSize int64) {
var index int
r1, rs := si.Returns()
if r1.Valid() {
@@ -307,6 +316,40 @@ func (m *machine) lowerCall(si *ssa.Instruction) {
}
}
+func (m *machine) lowerTailCall(si *ssa.Instruction) {
+ isDirectCall := si.Opcode() == ssa.OpcodeTailCallReturnCall
+ indirectCalleePtr, directCallee, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall)
+
+ // We currently support tail calls only when the args are passed via registers
+ // otherwise we fall back to a plain call.
+ // For details, see internal/engine/RATIONALE.md
+ isAllRegs := stackSlotSize == 0
+
+ switch {
+ case isDirectCall && isAllRegs:
+ tailJump := m.allocateInstr()
+ tailJump.asTailCall(directCallee, calleeABI)
+ m.insert(tailJump)
+ case !isDirectCall && isAllRegs:
+ ptr := m.compiler.VRegOf(indirectCalleePtr)
+ callInd := m.allocateInstr()
+ callInd.asTailCallIndirect(ptr, calleeABI)
+ m.insert(callInd)
+ case isDirectCall && !isAllRegs:
+ tailJump := m.allocateInstr()
+ tailJump.asCall(directCallee, calleeABI)
+ m.insert(tailJump)
+ case !isDirectCall && !isAllRegs:
+ ptr := m.compiler.VRegOf(indirectCalleePtr)
+ callInd := m.allocateInstr()
+ callInd.asCallIndirect(ptr, calleeABI)
+ m.insert(callInd)
+ }
+
+ // If this is a proper tail call, returns will be cleared in the postRegAlloc phase.
+ m.insertReturns(si, calleeABI, stackSlotSize)
+}
+
func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) {
if imm12Operand, ok := asImm12Operand(uint64(diff)); ok {
alu := m.allocateInstr()
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
index 1f563428a..560044673 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
@@ -140,6 +140,8 @@ var defKinds = [numInstructionKinds]defKind{
atomicStore: defKindNone,
dmb: defKindNone,
loadConstBlockArg: defKindRD,
+ tailCall: defKindCall,
+ tailCallInd: defKindCall,
}
// Defs returns the list of regalloc.VReg that are defined by the instruction.
@@ -278,6 +280,8 @@ var useKinds = [numInstructionKinds]useKind{
atomicStore: useKindRNRM,
loadConstBlockArg: useKindNone,
dmb: useKindNone,
+ tailCall: useKindCall,
+ tailCallInd: useKindCallInd,
}
// Uses returns the list of regalloc.VReg that are used by the instruction.
@@ -1501,6 +1505,10 @@ func (i *instruction) String() (str string) {
str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64))
case dmb:
str = "dmb"
+ case tailCall:
+ str = fmt.Sprintf("b %s", ssa.FuncRef(i.u1))
+ case tailCallInd:
+ str = fmt.Sprintf("b %s", formatVRegSized(i.rn.nr(), 64))
case udf:
str = "udf"
case emitSourceOffsetInfo:
@@ -1550,6 +1558,22 @@ func (i *instruction) asDMB() {
i.kind = dmb
}
+func (i *instruction) asTailCall(ref ssa.FuncRef, abi *backend.FunctionABI) {
+ i.kind = tailCall
+ i.u1 = uint64(ref)
+ if abi != nil {
+ i.u2 = abi.ABIInfoAsUint64()
+ }
+}
+
+func (i *instruction) asTailCallIndirect(ptr regalloc.VReg, abi *backend.FunctionABI) {
+ i.kind = tailCallInd
+ i.rn = operandNR(ptr)
+ if abi != nil {
+ i.u2 = abi.ABIInfoAsUint64()
+ }
+}
+
// TODO: delete unnecessary things.
const (
// nop0 represents a no-op of zero size.
@@ -1727,6 +1751,10 @@ const (
atomicStore
// dmb represents the data memory barrier instruction in inner-shareable (ish) mode.
dmb
+ // tailCall represents a tail call instruction.
+ tailCall
+ // tailCallInd represents a tail call indirect instruction.
+ tailCallInd
// UDF is the undefined instruction. For debugging only.
udf
// loadConstBlockArg represents a load of a constant block argument.
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
index 21be9b71e..5326a5e28 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
@@ -39,7 +39,7 @@ func (i *instruction) encode(m *machine) {
c.Emit4Bytes(encodeUnconditionalBranch(false, imm))
case call:
// We still don't know the exact address of the function to call, so we emit a placeholder.
- c.AddRelocationInfo(i.callFuncRef())
+ c.AddRelocationInfo(i.callFuncRef(), false)
c.Emit4Bytes(encodeUnconditionalBranch(true, 0)) // 0 = placeholder
case callInd:
c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true))
@@ -417,6 +417,12 @@ func (i *instruction) encode(m *machine) {
))
case dmb:
c.Emit4Bytes(encodeDMB())
+ case tailCall:
+ // We still don't know the exact address of the function to call, so we emit a placeholder.
+ c.AddRelocationInfo(i.callFuncRef(), true) // true = IsTailCall
+ c.Emit4Bytes(encodeUnconditionalBranch(false, 0)) // 0 = placeholder
+ case tailCallInd:
+ c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], false))
default:
panic(i.String())
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
index f9df356c0..190bc6014 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
@@ -788,6 +788,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
instr.asDMB()
m.insert(instr)
+ case ssa.OpcodeTailCallReturnCall, ssa.OpcodeTailCallReturnCallIndirect:
+ m.lowerTailCall(instr)
+
default:
panic("TODO: lowering " + op.String())
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
index c646a8fab..16d0746e5 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
@@ -198,6 +198,11 @@ func (m *machine) postRegAlloc() {
switch cur.kind {
case ret:
m.setupEpilogueAfter(cur.prev)
+ case tailCall, tailCallInd:
+ m.setupEpilogueAfter(cur.prev)
+ // If this has been encoded as a proper tail call, we can remove the trailing instructions.
+ // For details, see internal/engine/RATIONALE.md
+ m.removeUntilRet(cur.next)
case loadConstBlockArg:
lc := cur
next := lc.next
@@ -325,6 +330,20 @@ func (m *machine) setupEpilogueAfter(cur *instruction) {
linkInstr(cur, prevNext)
}
+// removeUntilRet removes the instructions starting from `cur` until the first `ret` instruction.
+func (m *machine) removeUntilRet(cur *instruction) {
+ for ; cur != nil; cur = cur.next {
+ prev, next := cur.prev, cur.next
+ prev.next = next
+ if next != nil {
+ next.prev = prev
+ }
+ if cur.kind == ret {
+ return
+ }
+ }
+}
+
// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient
// stack space left. Basically this is the combination of CalleeSavedRegisters plus argument registers execpt for x0,
// which always points to the execution context whenever the native code is entered from Go.
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go
index 932fe842b..9bb4dee15 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go
@@ -59,13 +59,19 @@ func (m *machine) ResolveRelocations(
if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
// Find the near trampoline island from callTrampolineIslandOffsets.
islandOffset := searchTrampolineIsland(callTrampolineIslandOffsets, int(instrOffset))
- islandTargetOffset := islandOffset + trampolineCallSize*int(r.FuncRef)
+ // Imported functions don't need trampolines, so we ignore them when we compute the offset
+ // (see also encodeCallTrampolineIsland)
+ funcOffset := int(r.FuncRef) - importedFns
+ islandTargetOffset := islandOffset + trampolineCallSize*funcOffset
diff = int64(islandTargetOffset) - (instrOffset)
if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
panic("BUG in trampoline placement")
}
}
- binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(true, diff))
+ // The unconditional branch instruction is usually encoded as a branch-and-link (BL),
+ // because it is a function call. However, if the instruction is a tail call,
+ // we encode it as a plain unconditional branch (B), so we won't overwrite the link register.
+ binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(!r.IsTailCall, diff))
}
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go
index a6df3e7e7..a603dbdd7 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go
@@ -6,8 +6,10 @@ import (
"errors"
"fmt"
"runtime"
+ "slices"
"sort"
"sync"
+ "sync/atomic"
"unsafe"
"github.com/tetratelabs/wazero/api"
@@ -23,11 +25,15 @@ import (
)
type (
+ compiledModuleWithCount struct {
+ *compiledModule
+ refCount int
+ }
// engine implements wasm.Engine.
engine struct {
wazeroVersion string
fileCache filecache.Cache
- compiledModules map[wasm.ModuleID]*compiledModule
+ compiledModules map[wasm.ModuleID]*compiledModuleWithCount
// sortedCompiledModules is a list of compiled modules sorted by the initial address of the executable.
sortedCompiledModules []*compiledModule
mux sync.RWMutex
@@ -42,25 +48,32 @@ type (
}
sharedFunctions struct {
- // memoryGrowExecutable is a compiled trampoline executable for memory.grow builtin function.
- memoryGrowExecutable []byte
- // checkModuleExitCode is a compiled trampoline executable for checking module instance exit code. This
- // is used when ensureTermination is true.
- checkModuleExitCode []byte
- // stackGrowExecutable is a compiled executable for growing stack builtin function.
- stackGrowExecutable []byte
- // tableGrowExecutable is a compiled trampoline executable for table.grow builtin function.
- tableGrowExecutable []byte
- // refFuncExecutable is a compiled trampoline executable for ref.func builtin function.
- refFuncExecutable []byte
- // memoryWait32Executable is a compiled trampoline executable for memory.wait32 builtin function
- memoryWait32Executable []byte
- // memoryWait64Executable is a compiled trampoline executable for memory.wait64 builtin function
- memoryWait64Executable []byte
- // memoryNotifyExecutable is a compiled trampoline executable for memory.notify builtin function
- memoryNotifyExecutable []byte
- listenerBeforeTrampolines map[*wasm.FunctionType][]byte
- listenerAfterTrampolines map[*wasm.FunctionType][]byte
+ // The compiled trampolines executable.
+ executable []byte
+ // memoryGrowAddress is the address of memory.grow builtin function.
+ memoryGrowAddress *byte
+ // checkModuleExitCodeAddress is the address of checking module instance exit code.
+ // This is used when ensureTermination is true.
+ checkModuleExitCodeAddress *byte
+ // stackGrowAddress is the address of growing stack builtin function.
+ stackGrowAddress *byte
+ // tableGrowAddress is the address of table.grow builtin function.
+ tableGrowAddress *byte
+ // refFuncAddress is the address of ref.func builtin function.
+ refFuncAddress *byte
+ // memoryWait32Address is the address of memory.wait32 builtin function
+ memoryWait32Address *byte
+ // memoryWait64Address is the address of memory.wait64 builtin function
+ memoryWait64Address *byte
+ // memoryNotifyAddress is the address of memory.notify builtin function
+ memoryNotifyAddress *byte
+ listenerTrampolines listenerTrampolines
+ }
+
+ listenerTrampolines = map[*wasm.FunctionType]struct {
+ executable []byte
+ before *byte
+ after *byte
}
// compiledModule is a compiled variant of a wasm.Module and ready to be used for instantiation.
@@ -83,8 +96,9 @@ type (
}
executables struct {
- executable []byte
- entryPreambles [][]byte
+ executable []byte
+ entryPreambles []byte
+ entryPreamblesPtrs []*byte
}
)
@@ -105,7 +119,7 @@ func NewEngine(ctx context.Context, _ api.CoreFeatures, fc filecache.Cache) wasm
machine := newMachine()
be := backend.NewCompiler(ctx, machine, ssa.NewBuilder())
e := &engine{
- compiledModules: make(map[wasm.ModuleID]*compiledModule),
+ compiledModules: make(map[wasm.ModuleID]*compiledModuleWithCount),
setFinalizer: runtime.SetFinalizer,
machine: machine,
be: be,
@@ -164,23 +178,46 @@ func (e *engine) CompileModule(ctx context.Context, module *wasm.Module, listene
}
func (exec *executables) compileEntryPreambles(m *wasm.Module, machine backend.Machine, be backend.Compiler) {
- exec.entryPreambles = make([][]byte, len(m.TypeSection))
- for i := range m.TypeSection {
+ if len(m.TypeSection) == 0 {
+ return
+ }
+
+ var preambles []byte
+ sizes := make([]int, len(m.TypeSection))
+
+ for i := range sizes {
typ := &m.TypeSection[i]
sig := frontend.SignatureForWasmFunctionType(typ)
be.Init()
buf := machine.CompileEntryPreamble(&sig)
- executable := mmapExecutable(buf)
- exec.entryPreambles[i] = executable
+ preambles = append(preambles, buf...)
+ align := 15 & -len(preambles) // Align 16-bytes boundary.
+ preambles = append(preambles, make([]byte, align)...)
+ sizes[i] = len(buf) + align
+ }
+
+ exec.entryPreambles = mmapExecutable(preambles)
+ exec.entryPreamblesPtrs = make([]*byte, len(sizes))
+
+ offset := 0
+ for i, size := range sizes {
+ ptr := &exec.entryPreambles[offset]
+ exec.entryPreamblesPtrs[i] = ptr
+ offset += size
if wazevoapi.PerfMapEnabled {
- wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&executable[0])),
- uint64(len(executable)), fmt.Sprintf("entry_preamble::type=%s", typ.String()))
+ typ := &m.TypeSection[i]
+ wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(ptr)),
+ uint64(size), fmt.Sprintf("entry_preamble::type=%s", typ.String()))
}
}
}
func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (*compiledModule, error) {
+ if module.IsHostModule {
+ return e.compileHostModule(ctx, module, listeners)
+ }
+
withListener := len(listeners) > 0
cm := &compiledModule{
offsets: wazevoapi.NewModuleContextOffsetData(module, withListener), parent: e, module: module,
@@ -188,116 +225,137 @@ func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listene
executables: &executables{},
}
- if module.IsHostModule {
- return e.compileHostModule(ctx, module, listeners)
- }
-
importedFns, localFns := int(module.ImportFunctionCount), len(module.FunctionSection)
if localFns == 0 {
return cm, nil
}
- rels := make([]backend.RelocationInfo, 0)
- refToBinaryOffset := make([]int, importedFns+localFns)
-
- if wazevoapi.DeterministicCompilationVerifierEnabled {
- // The compilation must be deterministic regardless of the order of functions being compiled.
- wazevoapi.DeterministicCompilationVerifierRandomizeIndexes(ctx)
+ machine := newMachine()
+ relocator, err := newEngineRelocator(machine, importedFns, localFns)
+ if err != nil {
+ return nil, err
}
needSourceInfo := module.DWARFLines != nil
- // Creates new compiler instances which are reused for each function.
ssaBuilder := ssa.NewBuilder()
- fe := frontend.NewFrontendCompiler(module, ssaBuilder, &cm.offsets, ensureTermination, withListener, needSourceInfo)
- machine := newMachine()
be := backend.NewCompiler(ctx, machine, ssaBuilder)
-
cm.executables.compileEntryPreambles(module, machine, be)
-
- totalSize := 0 // Total binary size of the executable.
cm.functionOffsets = make([]int, localFns)
- bodies := make([][]byte, localFns)
- // Trampoline relocation related variables.
- trampolineInterval, callTrampolineIslandSize, err := machine.CallTrampolineIslandInfo(localFns)
- if err != nil {
- return nil, err
+ var indexes []int
+ if wazevoapi.DeterministicCompilationVerifierEnabled {
+ // The compilation must be deterministic regardless of the order of functions being compiled.
+ indexes = wazevoapi.DeterministicCompilationVerifierRandomizeIndexes(ctx)
}
- needCallTrampoline := callTrampolineIslandSize > 0
- var callTrampolineIslandOffsets []int // Holds the offsets of trampoline islands.
- for i := range module.CodeSection {
- if wazevoapi.DeterministicCompilationVerifierEnabled {
- i = wazevoapi.DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex(ctx, i)
- }
+ if workers := experimental.GetCompilationWorkers(ctx); workers <= 1 {
+ // Compile with a single goroutine.
+ fe := frontend.NewFrontendCompiler(module, ssaBuilder, &cm.offsets, ensureTermination, withListener, needSourceInfo)
- fidx := wasm.Index(i + importedFns)
-
- if wazevoapi.NeedFunctionNameInContext {
- def := module.FunctionDefinition(fidx)
- name := def.DebugName()
- if len(def.ExportNames()) > 0 {
- name = def.ExportNames()[0]
+ for i := range module.CodeSection {
+ if wazevoapi.DeterministicCompilationVerifierEnabled {
+ i = indexes[i]
}
- ctx = wazevoapi.SetCurrentFunctionName(ctx, i, fmt.Sprintf("[%d/%d]%s", i, len(module.CodeSection)-1, name))
- }
-
- needListener := len(listeners) > 0 && listeners[i] != nil
- body, relsPerFunc, err := e.compileLocalWasmFunction(ctx, module, wasm.Index(i), fe, ssaBuilder, be, needListener)
- if err != nil {
- return nil, fmt.Errorf("compile function %d/%d: %v", i, len(module.CodeSection)-1, err)
- }
- // Align 16-bytes boundary.
- totalSize = (totalSize + 15) &^ 15
- cm.functionOffsets[i] = totalSize
-
- if needSourceInfo {
- // At the beginning of the function, we add the offset of the function body so that
- // we can resolve the source location of the call site of before listener call.
- cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize))
- cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, module.CodeSection[i].BodyOffsetInCodeSection)
+ fidx := wasm.Index(i + importedFns)
+ fctx := functionContext(ctx, module, i, fidx)
- for _, info := range be.SourceOffsetInfo() {
- cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize)+uintptr(info.ExecutableOffset))
- cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, uint64(info.SourceOffset))
+ needListener := len(listeners) > i && listeners[i] != nil
+ body, relsPerFunc, err := e.compileLocalWasmFunction(fctx, module, wasm.Index(i), fe, ssaBuilder, be, needListener)
+ if err != nil {
+ return nil, fmt.Errorf("compile function %d/%d: %v", i, len(module.CodeSection)-1, err)
}
- }
- fref := frontend.FunctionIndexToFuncRef(fidx)
- refToBinaryOffset[fref] = totalSize
-
- // At this point, relocation offsets are relative to the start of the function body,
- // so we adjust it to the start of the executable.
- for _, r := range relsPerFunc {
- r.Offset += int64(totalSize)
- rels = append(rels, r)
+ relocator.appendFunction(fctx, module, cm, i, fidx, body, relsPerFunc, be.SourceOffsetInfo())
}
-
- bodies[i] = body
- totalSize += len(body)
- if wazevoapi.PrintMachineCodeHexPerFunction {
- fmt.Printf("[[[machine code for %s]]]\n%s\n\n", wazevoapi.GetCurrentFunctionName(ctx), hex.EncodeToString(body))
+ } else {
+ // Compile with N worker goroutines.
+ // Collect compiled functions across workers in a slice,
+ // to be added to the relocator in-order and resolved serially at the end.
+ // This uses more memory and CPU (across cores), but can be significantly faster.
+ type compiledFunc struct {
+ fctx context.Context
+ fnum int
+ fidx wasm.Index
+ body []byte
+ relsPerFunc []backend.RelocationInfo
+ offsPerFunc []backend.SourceOffsetInfo
+ }
+
+ compiledFuncs := make([]compiledFunc, len(module.CodeSection))
+ ctx, cancel := context.WithCancelCause(ctx)
+ defer cancel(nil)
+
+ var count atomic.Uint32
+ var wg sync.WaitGroup
+ wg.Add(workers)
+
+ for range workers {
+ go func() {
+ defer wg.Done()
+
+ // Creates new compiler instances which are reused for each function.
+ machine := newMachine()
+ ssaBuilder := ssa.NewBuilder()
+ be := backend.NewCompiler(ctx, machine, ssaBuilder)
+ fe := frontend.NewFrontendCompiler(module, ssaBuilder, &cm.offsets, ensureTermination, withListener, needSourceInfo)
+
+ for {
+ if err := ctx.Err(); err != nil {
+ // Compilation canceled!
+ return
+ }
+
+ i := int(count.Add(1)) - 1
+ if i >= len(module.CodeSection) {
+ return
+ }
+
+ if wazevoapi.DeterministicCompilationVerifierEnabled {
+ i = indexes[i]
+ }
+
+ fidx := wasm.Index(i + importedFns)
+ fctx := functionContext(ctx, module, i, fidx)
+
+ needListener := len(listeners) > i && listeners[i] != nil
+ body, relsPerFunc, err := e.compileLocalWasmFunction(fctx, module, wasm.Index(i), fe, ssaBuilder, be, needListener)
+ if err != nil {
+ cancel(fmt.Errorf("compile function %d/%d: %v", i, len(module.CodeSection)-1, err))
+ return
+ }
+
+ compiledFuncs[i] = compiledFunc{
+ fctx, i, fidx, body,
+ // These slices are internal to the backend compiler and since we are going to buffer them instead
+ // of process them immediately we need to copy the memory.
+ slices.Clone(relsPerFunc),
+ slices.Clone(be.SourceOffsetInfo()),
+ }
+ }
+ }()
+ }
+
+ wg.Wait()
+ if err := context.Cause(ctx); err != nil {
+ return nil, err
}
- if needCallTrampoline {
- // If the total size exceeds the trampoline interval, we need to add a trampoline island.
- if totalSize/trampolineInterval > len(callTrampolineIslandOffsets) {
- callTrampolineIslandOffsets = append(callTrampolineIslandOffsets, totalSize)
- totalSize += callTrampolineIslandSize
- }
+ for i := range compiledFuncs {
+ fn := &compiledFuncs[i]
+ relocator.appendFunction(fn.fctx, module, cm, fn.fnum, fn.fidx, fn.body, fn.relsPerFunc, fn.offsPerFunc)
}
}
// Allocate executable memory and then copy the generated machine code.
- executable, err := platform.MmapCodeSegment(totalSize)
+ executable, err := platform.MmapCodeSegment(relocator.totalSize)
if err != nil {
panic(err)
}
cm.executable = executable
- for i, b := range bodies {
+ for i, b := range relocator.bodies {
offset := cm.functionOffsets[i]
copy(executable[offset:], b)
}
@@ -312,22 +370,108 @@ func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listene
}
}
- // Resolve relocations for local function calls.
- if len(rels) > 0 {
- machine.ResolveRelocations(refToBinaryOffset, importedFns, executable, rels, callTrampolineIslandOffsets)
- }
+ relocator.resolveRelocations(machine, executable, importedFns)
- if runtime.GOARCH == "arm64" {
- // On arm64, we cannot give all of rwx at the same time, so we change it to exec.
- if err = platform.MprotectRX(executable); err != nil {
- return nil, err
- }
+ if err = platform.MprotectRX(executable); err != nil {
+ return nil, err
}
cm.sharedFunctions = e.sharedFunctions
e.setFinalizer(cm.executables, executablesFinalizer)
return cm, nil
}
+func functionContext(ctx context.Context, module *wasm.Module, fnum int, fidx wasm.Index) context.Context {
+ if wazevoapi.NeedFunctionNameInContext {
+ def := module.FunctionDefinition(fidx)
+ name := def.DebugName()
+ if len(def.ExportNames()) > 0 {
+ name = def.ExportNames()[0]
+ }
+ ctx = wazevoapi.SetCurrentFunctionName(ctx, fnum, fmt.Sprintf("[%d/%d]%s", fnum, len(module.CodeSection)-1, name))
+ }
+ return ctx
+}
+
+type engineRelocator struct {
+ bodies [][]byte
+ refToBinaryOffset []int
+ rels []backend.RelocationInfo
+ totalSize int // Total binary size of the executable.
+ trampolineInterval int
+ callTrampolineIslandSize int
+ callTrampolineIslandOffsets []int // Holds the offsets of trampoline islands.
+}
+
+func newEngineRelocator(
+ machine backend.Machine,
+ importedFns, localFns int,
+) (r engineRelocator, err error) {
+ // Trampoline relocation related variables.
+ r.trampolineInterval, r.callTrampolineIslandSize, err = machine.CallTrampolineIslandInfo(localFns)
+ r.refToBinaryOffset = make([]int, importedFns+localFns)
+ r.bodies = make([][]byte, 0, localFns)
+ return
+}
+
+func (r *engineRelocator) resolveRelocations(machine backend.Machine, executable []byte, importedFns int) {
+ // Resolve relocations for local function calls.
+ if len(r.rels) > 0 {
+ machine.ResolveRelocations(r.refToBinaryOffset, importedFns, executable, r.rels, r.callTrampolineIslandOffsets)
+ }
+}
+
+func (r *engineRelocator) appendFunction(
+ ctx context.Context,
+ module *wasm.Module,
+ cm *compiledModule,
+ fnum int, fidx wasm.Index,
+ body []byte,
+ relsPerFunc []backend.RelocationInfo,
+ offsPerFunc []backend.SourceOffsetInfo,
+) {
+ // Align 16-bytes boundary.
+ r.totalSize = (r.totalSize + 15) &^ 15
+ cm.functionOffsets[fnum] = r.totalSize
+
+ needSourceInfo := module.DWARFLines != nil
+ if needSourceInfo {
+ // At the beginning of the function, we add the offset of the function body so that
+ // we can resolve the source location of the call site of before listener call.
+ cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(r.totalSize))
+ cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, module.CodeSection[fnum].BodyOffsetInCodeSection)
+
+ for _, info := range offsPerFunc {
+ cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(r.totalSize)+uintptr(info.ExecutableOffset))
+ cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, uint64(info.SourceOffset))
+ }
+ }
+
+ fref := frontend.FunctionIndexToFuncRef(fidx)
+ r.refToBinaryOffset[fref] = r.totalSize
+
+ // At this point, relocation offsets are relative to the start of the function body,
+ // so we adjust it to the start of the executable.
+ r.rels = slices.Grow(r.rels, len(relsPerFunc))
+ for _, rel := range relsPerFunc {
+ rel.Offset += int64(r.totalSize)
+ r.rels = append(r.rels, rel)
+ }
+
+ r.totalSize += len(body)
+ r.bodies = append(r.bodies, body)
+ if wazevoapi.PrintMachineCodeHexPerFunction {
+ fmt.Printf("[[[machine code for %s]]]\n%s\n\n", wazevoapi.GetCurrentFunctionName(ctx), hex.EncodeToString(body))
+ }
+
+ if r.callTrampolineIslandSize > 0 {
+ // If the total size exceeds the trampoline interval, we need to add a trampoline island.
+ if r.totalSize/r.trampolineInterval > len(r.callTrampolineIslandOffsets) {
+ r.callTrampolineIslandOffsets = append(r.callTrampolineIslandOffsets, r.totalSize)
+ r.totalSize += r.callTrampolineIslandSize
+ }
+ }
+}
+
func (e *engine) compileLocalWasmFunction(
ctx context.Context,
module *wasm.Module,
@@ -374,9 +518,7 @@ func (e *engine) compileLocalWasmFunction(
}
// TODO: optimize as zero copy.
- copied := make([]byte, len(original))
- copy(copied, original)
- return copied, rels, nil
+ return slices.Clone(original), rels, nil
}
func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener) (*compiledModule, error) {
@@ -448,9 +590,7 @@ func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, lis
}
// TODO: optimize as zero copy.
- copied := make([]byte, len(body))
- copy(copied, body)
- bodies[i] = copied
+ bodies[i] = slices.Clone(body)
totalSize += len(body)
}
@@ -475,11 +615,8 @@ func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, lis
wazevoapi.PerfMap.Flush(uintptr(unsafe.Pointer(&executable[0])), cm.functionOffsets)
}
- if runtime.GOARCH == "arm64" {
- // On arm64, we cannot give all of rwx at the same time, so we change it to exec.
- if err = platform.MprotectRX(executable); err != nil {
- return nil, err
- }
+ if err = platform.MprotectRX(executable); err != nil {
+ return nil, err
}
e.setFinalizer(cm.executables, executablesFinalizer)
return cm, nil
@@ -507,12 +644,17 @@ func (e *engine) DeleteCompiledModule(m *wasm.Module) {
e.mux.Lock()
defer e.mux.Unlock()
cm, ok := e.compiledModules[m.ID]
- if ok {
- if len(cm.executable) > 0 {
- e.deleteCompiledModuleFromSortedList(cm)
- }
- delete(e.compiledModules, m.ID)
+ if !ok {
+ return
}
+ cm.refCount--
+ if cm.refCount > 0 {
+ return
+ }
+ if len(cm.executable) > 0 {
+ e.deleteCompiledModuleFromSortedList(cm.compiledModule)
+ }
+ delete(e.compiledModules, m.ID)
}
func (e *engine) addCompiledModuleToSortedList(cm *compiledModule) {
@@ -569,7 +711,7 @@ func (e *engine) NewModuleEngine(m *wasm.Module, mi *wasm.ModuleInstance) (wasm.
// Note: imported functions are resolved in moduleEngine.ResolveImportedFunction.
me.importedFunctions = make([]importedFunction, m.ImportFunctionCount)
- compiled, ok := e.getCompiledModuleFromMemory(m)
+ compiled, ok := e.getCompiledModuleFromMemory(m, false)
if !ok {
return nil, errors.New("source module must be compiled before instantiation")
}
@@ -591,167 +733,123 @@ func (e *engine) NewModuleEngine(m *wasm.Module, mi *wasm.ModuleInstance) (wasm.
}
func (e *engine) compileSharedFunctions() {
- e.sharedFunctions = &sharedFunctions{
- listenerBeforeTrampolines: make(map[*wasm.FunctionType][]byte),
- listenerAfterTrampolines: make(map[*wasm.FunctionType][]byte),
+ var sizes [8]int
+ var trampolines []byte
+
+ addTrampoline := func(i int, buf []byte) {
+ trampolines = append(trampolines, buf...)
+ align := 15 & -len(trampolines) // Align 16-bytes boundary.
+ trampolines = append(trampolines, make([]byte, align)...)
+ sizes[i] = len(buf) + align
}
e.be.Init()
- {
- src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeGrowMemory, &ssa.Signature{
+ addTrampoline(0,
+ e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeGrowMemory, &ssa.Signature{
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32},
Results: []ssa.Type{ssa.TypeI32},
- }, false)
- e.sharedFunctions.memoryGrowExecutable = mmapExecutable(src)
- if wazevoapi.PerfMapEnabled {
- exe := e.sharedFunctions.memoryGrowExecutable
- wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_grow_trampoline")
- }
- }
+ }, false))
e.be.Init()
- {
- src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeTableGrow, &ssa.Signature{
+ addTrampoline(1,
+ e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeTableGrow, &ssa.Signature{
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* table index */, ssa.TypeI32 /* num */, ssa.TypeI64 /* ref */},
Results: []ssa.Type{ssa.TypeI32},
- }, false)
- e.sharedFunctions.tableGrowExecutable = mmapExecutable(src)
- if wazevoapi.PerfMapEnabled {
- exe := e.sharedFunctions.tableGrowExecutable
- wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "table_grow_trampoline")
- }
- }
+ }, false))
e.be.Init()
- {
- src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCheckModuleExitCode, &ssa.Signature{
+ addTrampoline(2,
+ e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCheckModuleExitCode, &ssa.Signature{
Params: []ssa.Type{ssa.TypeI32 /* exec context */},
Results: []ssa.Type{ssa.TypeI32},
- }, false)
- e.sharedFunctions.checkModuleExitCode = mmapExecutable(src)
- if wazevoapi.PerfMapEnabled {
- exe := e.sharedFunctions.checkModuleExitCode
- wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "check_module_exit_code_trampoline")
- }
- }
+ }, false))
e.be.Init()
- {
- src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeRefFunc, &ssa.Signature{
+ addTrampoline(3,
+ e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeRefFunc, &ssa.Signature{
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* function index */},
Results: []ssa.Type{ssa.TypeI64}, // returns the function reference.
- }, false)
- e.sharedFunctions.refFuncExecutable = mmapExecutable(src)
- if wazevoapi.PerfMapEnabled {
- exe := e.sharedFunctions.refFuncExecutable
- wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "ref_func_trampoline")
- }
- }
+ }, false))
e.be.Init()
- {
- src := e.machine.CompileStackGrowCallSequence()
- e.sharedFunctions.stackGrowExecutable = mmapExecutable(src)
- if wazevoapi.PerfMapEnabled {
- exe := e.sharedFunctions.stackGrowExecutable
- wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "stack_grow_trampoline")
- }
- }
+ addTrampoline(4, e.machine.CompileStackGrowCallSequence())
e.be.Init()
- {
- src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait32, &ssa.Signature{
+ addTrampoline(5,
+ e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait32, &ssa.Signature{
// exec context, timeout, expected, addr
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
// Returns the status.
Results: []ssa.Type{ssa.TypeI32},
- }, false)
- e.sharedFunctions.memoryWait32Executable = mmapExecutable(src)
- if wazevoapi.PerfMapEnabled {
- exe := e.sharedFunctions.memoryWait32Executable
- wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait32_trampoline")
- }
- }
+ }, false))
e.be.Init()
- {
- src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait64, &ssa.Signature{
+ addTrampoline(6,
+ e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait64, &ssa.Signature{
// exec context, timeout, expected, addr
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64, ssa.TypeI64},
// Returns the status.
Results: []ssa.Type{ssa.TypeI32},
- }, false)
- e.sharedFunctions.memoryWait64Executable = mmapExecutable(src)
- if wazevoapi.PerfMapEnabled {
- exe := e.sharedFunctions.memoryWait64Executable
- wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait64_trampoline")
- }
- }
+ }, false))
e.be.Init()
- {
- src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryNotify, &ssa.Signature{
+ addTrampoline(7,
+ e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryNotify, &ssa.Signature{
// exec context, count, addr
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
// Returns the number notified.
Results: []ssa.Type{ssa.TypeI32},
- }, false)
- e.sharedFunctions.memoryNotifyExecutable = mmapExecutable(src)
- if wazevoapi.PerfMapEnabled {
- exe := e.sharedFunctions.memoryNotifyExecutable
- wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_notify_trampoline")
- }
+ }, false))
+
+ fns := &sharedFunctions{
+ executable: mmapExecutable(trampolines),
+ listenerTrampolines: make(listenerTrampolines),
+ }
+ e.setFinalizer(fns, sharedFunctionsFinalizer)
+
+ offset := 0
+ fns.memoryGrowAddress = &fns.executable[offset]
+ offset += sizes[0]
+ fns.tableGrowAddress = &fns.executable[offset]
+ offset += sizes[1]
+ fns.checkModuleExitCodeAddress = &fns.executable[offset]
+ offset += sizes[2]
+ fns.refFuncAddress = &fns.executable[offset]
+ offset += sizes[3]
+ fns.stackGrowAddress = &fns.executable[offset]
+ offset += sizes[4]
+ fns.memoryWait32Address = &fns.executable[offset]
+ offset += sizes[5]
+ fns.memoryWait64Address = &fns.executable[offset]
+ offset += sizes[6]
+ fns.memoryNotifyAddress = &fns.executable[offset]
+
+ if wazevoapi.PerfMapEnabled {
+ wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.memoryGrowAddress)), uint64(sizes[0]), "memory_grow_trampoline")
+ wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.tableGrowAddress)), uint64(sizes[1]), "table_grow_trampoline")
+ wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.checkModuleExitCodeAddress)), uint64(sizes[2]), "check_module_exit_code_trampoline")
+ wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.refFuncAddress)), uint64(sizes[3]), "ref_func_trampoline")
+ wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.stackGrowAddress)), uint64(sizes[4]), "stack_grow_trampoline")
+ wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.memoryWait32Address)), uint64(sizes[5]), "memory_wait32_trampoline")
+ wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.memoryWait64Address)), uint64(sizes[6]), "memory_wait64_trampoline")
+ wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(fns.memoryNotifyAddress)), uint64(sizes[7]), "memory_notify_trampoline")
}
- e.setFinalizer(e.sharedFunctions, sharedFunctionsFinalizer)
+ e.sharedFunctions = fns
}
func sharedFunctionsFinalizer(sf *sharedFunctions) {
- if err := platform.MunmapCodeSegment(sf.memoryGrowExecutable); err != nil {
- panic(err)
- }
- if err := platform.MunmapCodeSegment(sf.checkModuleExitCode); err != nil {
- panic(err)
- }
- if err := platform.MunmapCodeSegment(sf.stackGrowExecutable); err != nil {
- panic(err)
- }
- if err := platform.MunmapCodeSegment(sf.tableGrowExecutable); err != nil {
- panic(err)
- }
- if err := platform.MunmapCodeSegment(sf.refFuncExecutable); err != nil {
+ if err := platform.MunmapCodeSegment(sf.executable); err != nil {
panic(err)
}
- if err := platform.MunmapCodeSegment(sf.memoryWait32Executable); err != nil {
- panic(err)
- }
- if err := platform.MunmapCodeSegment(sf.memoryWait64Executable); err != nil {
- panic(err)
- }
- if err := platform.MunmapCodeSegment(sf.memoryNotifyExecutable); err != nil {
- panic(err)
- }
- for _, f := range sf.listenerBeforeTrampolines {
- if err := platform.MunmapCodeSegment(f); err != nil {
- panic(err)
- }
- }
- for _, f := range sf.listenerAfterTrampolines {
- if err := platform.MunmapCodeSegment(f); err != nil {
+ for _, f := range sf.listenerTrampolines {
+ if err := platform.MunmapCodeSegment(f.executable); err != nil {
panic(err)
}
}
- sf.memoryGrowExecutable = nil
- sf.checkModuleExitCode = nil
- sf.stackGrowExecutable = nil
- sf.tableGrowExecutable = nil
- sf.refFuncExecutable = nil
- sf.memoryWait32Executable = nil
- sf.memoryWait64Executable = nil
- sf.memoryNotifyExecutable = nil
- sf.listenerBeforeTrampolines = nil
- sf.listenerAfterTrampolines = nil
+ sf.executable = nil
+ sf.listenerTrampolines = nil
}
func executablesFinalizer(exec *executables) {
@@ -762,12 +860,13 @@ func executablesFinalizer(exec *executables) {
}
exec.executable = nil
- for _, f := range exec.entryPreambles {
- if err := platform.MunmapCodeSegment(f); err != nil {
+ if len(exec.entryPreambles) > 0 {
+ if err := platform.MunmapCodeSegment(exec.entryPreambles); err != nil {
panic(err)
}
}
exec.entryPreambles = nil
+ exec.entryPreamblesPtrs = nil
}
func mmapExecutable(src []byte) []byte {
@@ -778,11 +877,8 @@ func mmapExecutable(src []byte) []byte {
copy(executable, src)
- if runtime.GOARCH == "arm64" {
- // On arm64, we cannot give all of rwx at the same time, so we change it to exec.
- if err = platform.MprotectRX(executable); err != nil {
- panic(err)
- }
+ if err = platform.MprotectRX(executable); err != nil {
+ panic(err)
}
return executable
}
@@ -804,25 +900,30 @@ func (e *engine) getListenerTrampolineForType(functionType *wasm.FunctionType) (
e.mux.Lock()
defer e.mux.Unlock()
- beforeBuf, ok := e.sharedFunctions.listenerBeforeTrampolines[functionType]
- afterBuf := e.sharedFunctions.listenerAfterTrampolines[functionType]
- if ok {
- return &beforeBuf[0], &afterBuf[0]
- }
+ trampoline, ok := e.sharedFunctions.listenerTrampolines[functionType]
+ if !ok {
+ var executable []byte
+ beforeSig, afterSig := frontend.SignatureForListener(functionType)
- beforeSig, afterSig := frontend.SignatureForListener(functionType)
+ e.be.Init()
+ buf := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerBefore, beforeSig, false)
+ executable = append(executable, buf...)
- e.be.Init()
- buf := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerBefore, beforeSig, false)
- beforeBuf = mmapExecutable(buf)
+ align := 15 & -len(executable) // Align 16-bytes boundary.
+ executable = append(executable, make([]byte, align)...)
+ offset := len(executable)
- e.be.Init()
- buf = e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerAfter, afterSig, false)
- afterBuf = mmapExecutable(buf)
+ e.be.Init()
+ buf = e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerAfter, afterSig, false)
+ executable = append(executable, buf...)
- e.sharedFunctions.listenerBeforeTrampolines[functionType] = beforeBuf
- e.sharedFunctions.listenerAfterTrampolines[functionType] = afterBuf
- return &beforeBuf[0], &afterBuf[0]
+ trampoline.executable = mmapExecutable(executable)
+ trampoline.before = &trampoline.executable[0]
+ trampoline.after = &trampoline.executable[offset]
+
+ e.sharedFunctions.listenerTrampolines[functionType] = trampoline
+ }
+ return trampoline.before, trampoline.after
}
func (cm *compiledModule) getSourceOffset(pc uintptr) uint64 {
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go
index e49353dc8..e0446e08a 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go
@@ -8,7 +8,6 @@ import (
"fmt"
"hash/crc32"
"io"
- "runtime"
"unsafe"
"github.com/tetratelabs/wazero/experimental"
@@ -33,7 +32,7 @@ func fileCacheKey(m *wasm.Module) (ret filecache.Key) {
s.Write(magic)
// Write the CPU features so that we can cache the compiled module for the same CPU.
// This prevents the incompatible CPU features from being used.
- cpu := platform.CpuFeatures.Raw()
+ cpu := platform.CpuFeatures().Raw()
// Reuse the `ret` buffer to write the first 8 bytes of the CPU features so that we can avoid the allocation.
binary.LittleEndian.PutUint64(ret[:8], cpu)
s.Write(ret[:8])
@@ -51,7 +50,7 @@ func (e *engine) addCompiledModule(module *wasm.Module, cm *compiledModule) (err
}
func (e *engine) getCompiledModule(module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (cm *compiledModule, ok bool, err error) {
- cm, ok = e.getCompiledModuleFromMemory(module)
+ cm, ok = e.getCompiledModuleFromMemory(module, true)
if ok {
return
}
@@ -88,16 +87,23 @@ func (e *engine) getCompiledModule(module *wasm.Module, listeners []experimental
func (e *engine) addCompiledModuleToMemory(m *wasm.Module, cm *compiledModule) {
e.mux.Lock()
defer e.mux.Unlock()
- e.compiledModules[m.ID] = cm
+ e.compiledModules[m.ID] = &compiledModuleWithCount{compiledModule: cm, refCount: 1}
if len(cm.executable) > 0 {
e.addCompiledModuleToSortedList(cm)
}
}
-func (e *engine) getCompiledModuleFromMemory(module *wasm.Module) (cm *compiledModule, ok bool) {
- e.mux.RLock()
- defer e.mux.RUnlock()
- cm, ok = e.compiledModules[module.ID]
+func (e *engine) getCompiledModuleFromMemory(module *wasm.Module, increaseRefCount bool) (cm *compiledModule, ok bool) {
+ e.mux.Lock()
+ defer e.mux.Unlock()
+
+ cmWithCount, ok := e.compiledModules[module.ID]
+ if ok {
+ cm = cmWithCount.compiledModule
+ if increaseRefCount {
+ cmWithCount.refCount++
+ }
+ }
return
}
@@ -246,11 +252,8 @@ func deserializeCompiledModule(wazeroVersion string, reader io.ReadCloser) (cm *
return nil, false, fmt.Errorf("compilationcache: checksum mismatch (expected %d, got %d)", expected, checksum)
}
- if runtime.GOARCH == "arm64" {
- // On arm64, we cannot give all of rwx at the same time, so we change it to exec.
- if err = platform.MprotectRX(executable); err != nil {
- return nil, false, err
- }
+ if err = platform.MprotectRX(executable); err != nil {
+ return nil, false, err
}
cm.executable = executable
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go
index eebdba034..5749e03c7 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go
@@ -469,7 +469,7 @@ func (c *Compiler) allocateVarLengthValues(_cap int, vs ...ssa.Value) ssa.Values
builder := c.ssaBuilder
pool := builder.VarLengthPool()
args := pool.Allocate(_cap)
- args = args.Append(builder.VarLengthPool(), vs...)
+ args = args.Append(pool, vs...)
return args
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go
index e73debbd1..1277db0bf 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go
@@ -123,8 +123,7 @@ func (c *Compiler) nPeekDup(n int) ssa.Values {
l := c.state()
tail := len(l.values)
- args := c.allocateVarLengthValues(n)
- args = args.Append(c.ssaBuilder.VarLengthPool(), l.values[tail-n:tail]...)
+ args := c.allocateVarLengthValues(n, l.values[tail-n:tail]...)
return args
}
@@ -665,19 +664,22 @@ func (c *Compiler) lowerCurrentOpcode() {
tableBaseAddr := c.loadTableBaseAddr(tableInstancePtr)
addr := builder.AllocateInstruction().AsIadd(tableBaseAddr, offsetInBytes).Insert(builder).Return()
- // Prepare the loop and following block.
- beforeLoop := builder.AllocateBasicBlock()
- loopBlk := builder.AllocateBasicBlock()
- loopVar := loopBlk.AddParam(builder, ssa.TypeI64)
- followingBlk := builder.AllocateBasicBlock()
-
// Uses the copy trick for faster filling buffer like memory.fill, but in this case we copy 8 bytes at a time.
+ // Tables are rarely huge, so ignore the 8KB maximum.
+ // https://github.com/golang/go/blob/go1.24.0/src/slices/slices.go#L514-L517
+ //
// buf := memoryInst.Buffer[offset : offset+fillSize]
// buf[0:8] = value
// for i := 8; i < fillSize; i *= 2 { Begin with 8 bytes.
// copy(buf[i:], buf[:i])
// }
+ // Prepare the loop and following block.
+ beforeLoop := builder.AllocateBasicBlock()
+ loopBlk := builder.AllocateBasicBlock()
+ loopVar := loopBlk.AddParam(builder, ssa.TypeI64)
+ followingBlk := builder.AllocateBasicBlock()
+
// Insert the jump to the beforeLoop block; If the fillSize is zero, then jump to the following block to skip entire logics.
zero := builder.AllocateInstruction().AsIconst64(0).Insert(builder).Return()
ifFillSizeZero := builder.AllocateInstruction().AsIcmp(fillSizeExt, zero, ssa.IntegerCmpCondEqual).
@@ -688,32 +690,24 @@ func (c *Compiler) lowerCurrentOpcode() {
// buf[0:8] = value
builder.SetCurrentBlock(beforeLoop)
builder.AllocateInstruction().AsStore(ssa.OpcodeStore, value, addr, 0).Insert(builder)
- initValue := builder.AllocateInstruction().AsIconst64(8).Insert(builder).Return()
- c.insertJumpToBlock(c.allocateVarLengthValues(1, initValue), loopBlk)
+ eight := builder.AllocateInstruction().AsIconst64(8).Insert(builder).Return()
+ c.insertJumpToBlock(c.allocateVarLengthValues(1, eight), loopBlk)
builder.SetCurrentBlock(loopBlk)
dstAddr := builder.AllocateInstruction().AsIadd(addr, loopVar).Insert(builder).Return()
- // If loopVar*2 > fillSizeInBytes, then count must be fillSizeInBytes-loopVar.
- var count ssa.Value
- {
- loopVarDoubled := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return()
- loopVarDoubledLargerThanFillSize := builder.
- AllocateInstruction().AsIcmp(loopVarDoubled, fillSizeInBytes, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual).
- Insert(builder).Return()
- diff := builder.AllocateInstruction().AsIsub(fillSizeInBytes, loopVar).Insert(builder).Return()
- count = builder.AllocateInstruction().AsSelect(loopVarDoubledLargerThanFillSize, diff, loopVar).Insert(builder).Return()
- }
+ newLoopVar := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return()
+ newLoopVarLessThanFillSize := builder.AllocateInstruction().
+ AsIcmp(newLoopVar, fillSizeInBytes, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return()
- c.callMemmove(dstAddr, addr, count)
+ // On the last iteration, count must be fillSizeInBytes-loopVar.
+ diff := builder.AllocateInstruction().AsIsub(fillSizeInBytes, loopVar).Insert(builder).Return()
+ count := builder.AllocateInstruction().AsSelect(newLoopVarLessThanFillSize, loopVar, diff).Insert(builder).Return()
- shiftAmount := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return()
- newLoopVar := builder.AllocateInstruction().AsIshl(loopVar, shiftAmount).Insert(builder).Return()
- loopVarLessThanFillSize := builder.AllocateInstruction().
- AsIcmp(newLoopVar, fillSizeInBytes, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return()
+ c.callMemmove(dstAddr, addr, count)
builder.AllocateInstruction().
- AsBrnz(loopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk).
+ AsBrnz(newLoopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk).
Insert(builder)
c.insertJumpToBlock(ssa.ValuesNil, followingBlk)
@@ -741,11 +735,15 @@ func (c *Compiler) lowerCurrentOpcode() {
// Calculate the base address:
addr := builder.AllocateInstruction().AsIadd(c.getMemoryBaseValue(false), offset).Insert(builder).Return()
- // Uses the copy trick for faster filling buffer: https://gist.github.com/taylorza/df2f89d5f9ab3ffd06865062a4cf015d
+ // Uses the copy trick for faster filling buffer, with a maximum chunk size of 8KB.
+ // https://github.com/golang/go/blob/go1.24.0/src/bytes/bytes.go#L664-L673
+ //
// buf := memoryInst.Buffer[offset : offset+fillSize]
// buf[0] = value
- // for i := 1; i < fillSize; i *= 2 {
- // copy(buf[i:], buf[:i])
+ // for i := 1; i < fillSize; {
+ // chunk := ((i - 1) & 8191) + 1
+ // copy(buf[i:], buf[:chunk])
+ // i += chunk
// }
// Prepare the loop and following block.
@@ -764,32 +762,31 @@ func (c *Compiler) lowerCurrentOpcode() {
// buf[0] = value
builder.SetCurrentBlock(beforeLoop)
builder.AllocateInstruction().AsStore(ssa.OpcodeIstore8, value, addr, 0).Insert(builder)
- initValue := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return()
- c.insertJumpToBlock(c.allocateVarLengthValues(1, initValue), loopBlk)
+ one := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return()
+ c.insertJumpToBlock(c.allocateVarLengthValues(1, one), loopBlk)
builder.SetCurrentBlock(loopBlk)
dstAddr := builder.AllocateInstruction().AsIadd(addr, loopVar).Insert(builder).Return()
- // If loopVar*2 > fillSizeExt, then count must be fillSizeExt-loopVar.
- var count ssa.Value
- {
- loopVarDoubled := builder.AllocateInstruction().AsIadd(loopVar, loopVar).Insert(builder).Return()
- loopVarDoubledLargerThanFillSize := builder.
- AllocateInstruction().AsIcmp(loopVarDoubled, fillSize, ssa.IntegerCmpCondUnsignedGreaterThanOrEqual).
- Insert(builder).Return()
- diff := builder.AllocateInstruction().AsIsub(fillSize, loopVar).Insert(builder).Return()
- count = builder.AllocateInstruction().AsSelect(loopVarDoubledLargerThanFillSize, diff, loopVar).Insert(builder).Return()
- }
-
- c.callMemmove(dstAddr, addr, count)
+ // chunk := ((i - 1) & 8191) + 1
+ mask := builder.AllocateInstruction().AsIconst64(8191).Insert(builder).Return()
+ tmp1 := builder.AllocateInstruction().AsIsub(loopVar, one).Insert(builder).Return()
+ tmp2 := builder.AllocateInstruction().AsBand(tmp1, mask).Insert(builder).Return()
+ chunk := builder.AllocateInstruction().AsIadd(tmp2, one).Insert(builder).Return()
- shiftAmount := builder.AllocateInstruction().AsIconst64(1).Insert(builder).Return()
- newLoopVar := builder.AllocateInstruction().AsIshl(loopVar, shiftAmount).Insert(builder).Return()
- loopVarLessThanFillSize := builder.AllocateInstruction().
+ // i += chunk
+ newLoopVar := builder.AllocateInstruction().AsIadd(loopVar, chunk).Insert(builder).Return()
+ newLoopVarLessThanFillSize := builder.AllocateInstruction().
AsIcmp(newLoopVar, fillSize, ssa.IntegerCmpCondUnsignedLessThan).Insert(builder).Return()
+ // count = min(chunk, fillSize-loopVar)
+ diff := builder.AllocateInstruction().AsIsub(fillSize, loopVar).Insert(builder).Return()
+ count := builder.AllocateInstruction().AsSelect(newLoopVarLessThanFillSize, chunk, diff).Insert(builder).Return()
+
+ c.callMemmove(dstAddr, addr, count)
+
builder.AllocateInstruction().
- AsBrnz(loopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk).
+ AsBrnz(newLoopVarLessThanFillSize, c.allocateVarLengthValues(1, newLoopVar), loopBlk).
Insert(builder)
c.insertJumpToBlock(ssa.ValuesNil, followingBlk)
@@ -1173,7 +1170,7 @@ func (c *Compiler) lowerCurrentOpcode() {
ssa.TypeI64,
).Insert(builder).Return()
- args := c.allocateVarLengthValues(1, c.execCtxPtrValue, pages)
+ args := c.allocateVarLengthValues(2, c.execCtxPtrValue, pages)
callGrowRet := builder.
AllocateInstruction().
AsCallIndirect(memoryGrowPtr, &c.memoryGrowSig, args).
@@ -1343,8 +1340,7 @@ func (c *Compiler) lowerCurrentOpcode() {
blockType: bt,
})
- args := c.allocateVarLengthValues(originalLen)
- args = args.Append(builder.VarLengthPool(), state.values[originalLen:]...)
+ args := c.allocateVarLengthValues(len(bt.Params), state.values[originalLen:]...)
// Insert the jump to the header of loop.
br := builder.AllocateInstruction()
@@ -1383,8 +1379,7 @@ func (c *Compiler) lowerCurrentOpcode() {
// multiple definitions (one in Then and another in Else blocks).
c.addBlockParamsFromWasmTypes(bt.Results, followingBlk)
- args := c.allocateVarLengthValues(len(bt.Params))
- args = args.Append(builder.VarLengthPool(), state.values[len(state.values)-len(bt.Params):]...)
+ args := c.allocateVarLengthValues(len(bt.Params), state.values[len(state.values)-len(bt.Params):]...)
// Insert the conditional jump to the Else block.
brz := builder.AllocateInstruction()
@@ -1568,11 +1563,7 @@ func (c *Compiler) lowerCurrentOpcode() {
c.callListenerAfter()
}
- results := c.nPeekDup(c.results())
- instr := builder.AllocateInstruction()
-
- instr.AsReturn(results)
- builder.InsertInstruction(instr)
+ c.lowerReturn(builder)
state.unreachable = true
case wasm.OpcodeUnreachable:
@@ -1597,66 +1588,7 @@ func (c *Compiler) lowerCurrentOpcode() {
if state.unreachable {
break
}
-
- var typIndex wasm.Index
- if fnIndex < c.m.ImportFunctionCount {
- // Before transfer the control to the callee, we have to store the current module's moduleContextPtr
- // into execContext.callerModuleContextPtr in case when the callee is a Go function.
- c.storeCallerModuleContext()
- var fi int
- for i := range c.m.ImportSection {
- imp := &c.m.ImportSection[i]
- if imp.Type == wasm.ExternTypeFunc {
- if fi == int(fnIndex) {
- typIndex = imp.DescFunc
- break
- }
- fi++
- }
- }
- } else {
- typIndex = c.m.FunctionSection[fnIndex-c.m.ImportFunctionCount]
- }
- typ := &c.m.TypeSection[typIndex]
-
- argN := len(typ.Params)
- tail := len(state.values) - argN
- vs := state.values[tail:]
- state.values = state.values[:tail]
- args := c.allocateVarLengthValues(2+len(vs), c.execCtxPtrValue)
-
- sig := c.signatures[typ]
- call := builder.AllocateInstruction()
- if fnIndex >= c.m.ImportFunctionCount {
- args = args.Append(builder.VarLengthPool(), c.moduleCtxPtrValue) // This case the callee module is itself.
- args = args.Append(builder.VarLengthPool(), vs...)
- call.AsCall(FunctionIndexToFuncRef(fnIndex), sig, args)
- builder.InsertInstruction(call)
- } else {
- // This case we have to read the address of the imported function from the module context.
- moduleCtx := c.moduleCtxPtrValue
- loadFuncPtr, loadModuleCtxPtr := builder.AllocateInstruction(), builder.AllocateInstruction()
- funcPtrOffset, moduleCtxPtrOffset, _ := c.offset.ImportedFunctionOffset(fnIndex)
- loadFuncPtr.AsLoad(moduleCtx, funcPtrOffset.U32(), ssa.TypeI64)
- loadModuleCtxPtr.AsLoad(moduleCtx, moduleCtxPtrOffset.U32(), ssa.TypeI64)
- builder.InsertInstruction(loadFuncPtr)
- builder.InsertInstruction(loadModuleCtxPtr)
-
- args = args.Append(builder.VarLengthPool(), loadModuleCtxPtr.Return())
- args = args.Append(builder.VarLengthPool(), vs...)
- call.AsCallIndirect(loadFuncPtr.Return(), sig, args)
- builder.InsertInstruction(call)
- }
-
- first, rest := call.Returns()
- if first.Valid() {
- state.push(first)
- }
- for _, v := range rest {
- state.push(v)
- }
-
- c.reloadAfterCall()
+ c.lowerCall(fnIndex)
case wasm.OpcodeDrop:
if state.unreachable {
@@ -3190,7 +3122,7 @@ func (c *Compiler) lowerCurrentOpcode() {
ssa.TypeI64,
).Insert(builder).Return()
- args := c.allocateVarLengthValues(3, c.execCtxPtrValue, timeout, exp, addr)
+ args := c.allocateVarLengthValues(4, c.execCtxPtrValue, timeout, exp, addr)
memoryWaitRet := builder.AllocateInstruction().
AsCallIndirect(memoryWaitPtr, sig, args).
Insert(builder).Return()
@@ -3211,7 +3143,7 @@ func (c *Compiler) lowerCurrentOpcode() {
wazevoapi.ExecutionContextOffsetMemoryNotifyTrampolineAddress.U32(),
ssa.TypeI64,
).Insert(builder).Return()
- args := c.allocateVarLengthValues(2, c.execCtxPtrValue, count, addr)
+ args := c.allocateVarLengthValues(3, c.execCtxPtrValue, count, addr)
memoryNotifyRet := builder.AllocateInstruction().
AsCallIndirect(memoryNotifyPtr, &c.memoryNotifySig, args).
Insert(builder).Return()
@@ -3460,6 +3392,25 @@ func (c *Compiler) lowerCurrentOpcode() {
elementAddr := c.lowerAccessTableWithBoundsCheck(tableIndex, targetOffsetInTable)
loaded := builder.AllocateInstruction().AsLoad(elementAddr, 0, ssa.TypeI64).Insert(builder).Return()
state.push(loaded)
+
+ case wasm.OpcodeTailCallReturnCallIndirect:
+ typeIndex := c.readI32u()
+ tableIndex := c.readI32u()
+ if state.unreachable {
+ break
+ }
+ _, _ = typeIndex, tableIndex
+ c.lowerTailCallReturnCallIndirect(typeIndex, tableIndex)
+ state.unreachable = true
+
+ case wasm.OpcodeTailCallReturnCall:
+ fnIndex := c.readI32u()
+ if state.unreachable {
+ break
+ }
+ c.lowerTailCallReturnCall(fnIndex)
+ state.unreachable = true
+
default:
panic("TODO: unsupported in wazevo yet: " + wasm.InstructionName(op))
}
@@ -3473,6 +3424,14 @@ func (c *Compiler) lowerCurrentOpcode() {
c.loweringState.pc++
}
+func (c *Compiler) lowerReturn(builder ssa.Builder) {
+ results := c.nPeekDup(c.results())
+ instr := builder.AllocateInstruction()
+
+ instr.AsReturn(results)
+ builder.InsertInstruction(instr)
+}
+
func (c *Compiler) lowerExtMul(v1, v2 ssa.Value, from, to ssa.VecLane, signed, low bool) ssa.Value {
// TODO: The sequence `Widen; Widen; VIMul` can be substituted for a single instruction on some ISAs.
builder := c.ssaBuilder
@@ -3533,7 +3492,83 @@ func (c *Compiler) lowerAccessTableWithBoundsCheck(tableIndex uint32, elementOff
return calcElementAddressInTable.Return()
}
-func (c *Compiler) lowerCallIndirect(typeIndex, tableIndex uint32) {
+func (c *Compiler) prepareCall(fnIndex uint32) (isIndirect bool, sig *ssa.Signature, args ssa.Values, funcRefOrPtrValue uint64) {
+ builder := c.ssaBuilder
+ state := c.state()
+ var typIndex wasm.Index
+ if fnIndex < c.m.ImportFunctionCount {
+ // Before transfer the control to the callee, we have to store the current module's moduleContextPtr
+ // into execContext.callerModuleContextPtr in case when the callee is a Go function.
+ c.storeCallerModuleContext()
+ var fi int
+ for i := range c.m.ImportSection {
+ imp := &c.m.ImportSection[i]
+ if imp.Type == wasm.ExternTypeFunc {
+ if fi == int(fnIndex) {
+ typIndex = imp.DescFunc
+ break
+ }
+ fi++
+ }
+ }
+ } else {
+ typIndex = c.m.FunctionSection[fnIndex-c.m.ImportFunctionCount]
+ }
+ typ := &c.m.TypeSection[typIndex]
+
+ argN := len(typ.Params)
+ tail := len(state.values) - argN
+ vs := state.values[tail:]
+ state.values = state.values[:tail]
+ args = c.allocateVarLengthValues(2+len(vs), c.execCtxPtrValue)
+
+ sig = c.signatures[typ]
+ if fnIndex >= c.m.ImportFunctionCount {
+ args = args.Append(builder.VarLengthPool(), c.moduleCtxPtrValue) // This case the callee module is itself.
+ args = args.Append(builder.VarLengthPool(), vs...)
+ return false, sig, args, uint64(FunctionIndexToFuncRef(fnIndex))
+ } else {
+ // This case we have to read the address of the imported function from the module context.
+ moduleCtx := c.moduleCtxPtrValue
+ loadFuncPtr, loadModuleCtxPtr := builder.AllocateInstruction(), builder.AllocateInstruction()
+ funcPtrOffset, moduleCtxPtrOffset, _ := c.offset.ImportedFunctionOffset(fnIndex)
+ loadFuncPtr.AsLoad(moduleCtx, funcPtrOffset.U32(), ssa.TypeI64)
+ loadModuleCtxPtr.AsLoad(moduleCtx, moduleCtxPtrOffset.U32(), ssa.TypeI64)
+ builder.InsertInstruction(loadFuncPtr)
+ builder.InsertInstruction(loadModuleCtxPtr)
+
+ args = args.Append(builder.VarLengthPool(), loadModuleCtxPtr.Return())
+ args = args.Append(builder.VarLengthPool(), vs...)
+
+ return true, sig, args, uint64(loadFuncPtr.Return())
+ }
+}
+
+func (c *Compiler) lowerCall(fnIndex uint32) {
+ builder := c.ssaBuilder
+ state := c.state()
+ isIndirect, sig, args, funcRefOrPtrValue := c.prepareCall(fnIndex)
+
+ call := builder.AllocateInstruction()
+ if isIndirect {
+ call.AsCallIndirect(ssa.Value(funcRefOrPtrValue), sig, args)
+ } else {
+ call.AsCall(ssa.FuncRef(funcRefOrPtrValue), sig, args)
+ }
+ builder.InsertInstruction(call)
+
+ first, rest := call.Returns()
+ if first.Valid() {
+ state.push(first)
+ }
+ for _, v := range rest {
+ state.push(v)
+ }
+
+ c.reloadAfterCall()
+}
+
+func (c *Compiler) prepareCallIndirect(typeIndex, tableIndex uint32) (ssa.Value, *wasm.FunctionType, ssa.Values) {
builder := c.ssaBuilder
state := c.state()
@@ -3601,6 +3636,14 @@ func (c *Compiler) lowerCallIndirect(typeIndex, tableIndex uint32) {
// into execContext.callerModuleContextPtr in case when the callee is a Go function.
c.storeCallerModuleContext()
+ return executablePtr, typ, args
+}
+
+func (c *Compiler) lowerCallIndirect(typeIndex, tableIndex uint32) {
+ builder := c.ssaBuilder
+ state := c.state()
+ executablePtr, typ, args := c.prepareCallIndirect(typeIndex, tableIndex)
+
call := builder.AllocateInstruction()
call.AsCallIndirect(executablePtr, c.signatures[typ], args)
builder.InsertInstruction(call)
@@ -3616,6 +3659,62 @@ func (c *Compiler) lowerCallIndirect(typeIndex, tableIndex uint32) {
c.reloadAfterCall()
}
+func (c *Compiler) lowerTailCallReturnCall(fnIndex uint32) {
+ isIndirect, sig, args, funcRefOrPtrValue := c.prepareCall(fnIndex)
+ builder := c.ssaBuilder
+ state := c.state()
+
+ call := builder.AllocateInstruction()
+ if isIndirect {
+ call.AsTailCallReturnCallIndirect(ssa.Value(funcRefOrPtrValue), sig, args)
+ } else {
+ call.AsTailCallReturnCall(ssa.FuncRef(funcRefOrPtrValue), sig, args)
+ }
+ builder.InsertInstruction(call)
+
+ // In a proper tail call, the following code is unreachable since execution
+ // transfers to the callee. However, sometimes the backend might need to fall back to
+ // a regular call, so we include return handling and let the backend delete it
+ // when redundant.
+ // For details, see internal/engine/RATIONALE.md
+ first, rest := call.Returns()
+ if first.Valid() {
+ state.push(first)
+ }
+ for _, v := range rest {
+ state.push(v)
+ }
+
+ c.reloadAfterCall()
+ c.lowerReturn(builder)
+}
+
+func (c *Compiler) lowerTailCallReturnCallIndirect(typeIndex, tableIndex uint32) {
+ builder := c.ssaBuilder
+ state := c.state()
+ executablePtr, typ, args := c.prepareCallIndirect(typeIndex, tableIndex)
+
+ call := builder.AllocateInstruction()
+ call.AsTailCallReturnCallIndirect(executablePtr, c.signatures[typ], args)
+ builder.InsertInstruction(call)
+
+ // In a proper tail call, the following code is unreachable since execution
+ // transfers to the callee. However, sometimes the backend might need to fall back to
+ // a regular call, so we include return handling and let the backend delete it
+ // when redundant.
+ // For details, see internal/engine/RATIONALE.md
+ first, rest := call.Returns()
+ if first.Valid() {
+ state.push(first)
+ }
+ for _, v := range rest {
+ state.push(v)
+ }
+
+ c.reloadAfterCall()
+ c.lowerReturn(builder)
+}
+
// memOpSetup inserts the bounds check and calculates the address of the memory operation (loads/stores).
func (c *Compiler) memOpSetup(baseAddr ssa.Value, constOffset, operationSizeInBytes uint64) (address ssa.Value) {
address = ssa.ValueInvalid
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go
index 8811feed7..53206f1cc 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go
@@ -174,20 +174,21 @@ func (m *moduleEngine) NewFunction(index wasm.Index) api.Function {
indexInModule: index,
executable: &p.executable[offset],
parent: m,
- preambleExecutable: &m.parent.entryPreambles[typIndex][0],
+ preambleExecutable: p.entryPreamblesPtrs[typIndex],
sizeOfParamResultSlice: sizeOfParamResultSlice,
requiredParams: typ.ParamNumInUint64,
numberOfResults: typ.ResultNumInUint64,
}
- ce.execCtx.memoryGrowTrampolineAddress = &m.parent.sharedFunctions.memoryGrowExecutable[0]
- ce.execCtx.stackGrowCallTrampolineAddress = &m.parent.sharedFunctions.stackGrowExecutable[0]
- ce.execCtx.checkModuleExitCodeTrampolineAddress = &m.parent.sharedFunctions.checkModuleExitCode[0]
- ce.execCtx.tableGrowTrampolineAddress = &m.parent.sharedFunctions.tableGrowExecutable[0]
- ce.execCtx.refFuncTrampolineAddress = &m.parent.sharedFunctions.refFuncExecutable[0]
- ce.execCtx.memoryWait32TrampolineAddress = &m.parent.sharedFunctions.memoryWait32Executable[0]
- ce.execCtx.memoryWait64TrampolineAddress = &m.parent.sharedFunctions.memoryWait64Executable[0]
- ce.execCtx.memoryNotifyTrampolineAddress = &m.parent.sharedFunctions.memoryNotifyExecutable[0]
+ sharedFunctions := p.sharedFunctions
+ ce.execCtx.memoryGrowTrampolineAddress = sharedFunctions.memoryGrowAddress
+ ce.execCtx.stackGrowCallTrampolineAddress = sharedFunctions.stackGrowAddress
+ ce.execCtx.checkModuleExitCodeTrampolineAddress = sharedFunctions.checkModuleExitCodeAddress
+ ce.execCtx.tableGrowTrampolineAddress = sharedFunctions.tableGrowAddress
+ ce.execCtx.refFuncTrampolineAddress = sharedFunctions.refFuncAddress
+ ce.execCtx.memoryWait32TrampolineAddress = sharedFunctions.memoryWait32Address
+ ce.execCtx.memoryWait64TrampolineAddress = sharedFunctions.memoryWait64Address
+ ce.execCtx.memoryNotifyTrampolineAddress = sharedFunctions.memoryNotifyAddress
ce.execCtx.memmoveAddress = memmovPtr
ce.init()
return ce
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go
index 9a3d1da6e..7b37a8afe 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go
@@ -633,6 +633,14 @@ const (
// OpcodeFence is a memory fence operation.
OpcodeFence
+ // OpcodeTailCallReturnCall is the equivalent of OpcodeCall (a "near" call)
+ // for tail calls. Semantically, it combines Call + Return into a single operation.
+ OpcodeTailCallReturnCall
+
+ // OpcodeTailCallReturnCallIndirect is the equivalent of OpcodeCallIndirect (a call to a function address)
+ // for tail calls. Semantically, it combines CallIndirect + Return into a single operation.
+ OpcodeTailCallReturnCallIndirect
+
// opcodeEnd marks the end of the opcode list.
opcodeEnd
)
@@ -679,12 +687,44 @@ func (op AtomicRmwOp) String() string {
type returnTypesFn func(b *builder, instr *Instruction) (t1 Type, ts []Type)
var (
- returnTypesFnNoReturns returnTypesFn = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return typeInvalid, nil }
- returnTypesFnSingle = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return instr.typ, nil }
- returnTypesFnI32 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeI32, nil }
- returnTypesFnF32 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF32, nil }
- returnTypesFnF64 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF64, nil }
- returnTypesFnV128 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeV128, nil }
+ returnTypesFnNoReturns returnTypesFn = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return typeInvalid, nil }
+ returnTypesFnSingle = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return instr.typ, nil }
+ returnTypesFnI32 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeI32, nil }
+ returnTypesFnF32 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF32, nil }
+ returnTypesFnF64 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeF64, nil }
+ returnTypesFnV128 = func(b *builder, instr *Instruction) (t1 Type, ts []Type) { return TypeV128, nil }
+ returnTypesFnCallIndirect = func(b *builder, instr *Instruction) (t1 Type, ts []Type) {
+ sigID := SignatureID(instr.u1)
+ sig, ok := b.signatures[sigID]
+ if !ok {
+ panic("BUG")
+ }
+ switch len(sig.Results) {
+ case 0:
+ t1 = typeInvalid
+ case 1:
+ t1 = sig.Results[0]
+ default:
+ t1, ts = sig.Results[0], sig.Results[1:]
+ }
+ return
+ }
+ returnTypesFnCall = func(b *builder, instr *Instruction) (t1 Type, ts []Type) {
+ sigID := SignatureID(instr.u2)
+ sig, ok := b.signatures[sigID]
+ if !ok {
+ panic("BUG")
+ }
+ switch len(sig.Results) {
+ case 0:
+ t1 = typeInvalid
+ case 1:
+ t1 = sig.Results[0]
+ default:
+ t1, ts = sig.Results[0], sig.Results[1:]
+ }
+ return
+ }
)
// sideEffect provides the info to determine if an instruction has side effects which
@@ -846,6 +886,8 @@ var instructionSideEffects = [opcodeEnd]sideEffect{
OpcodeAtomicStore: sideEffectStrict,
OpcodeAtomicCas: sideEffectStrict,
OpcodeFence: sideEffectStrict,
+ OpcodeTailCallReturnCall: sideEffectStrict,
+ OpcodeTailCallReturnCallIndirect: sideEffectStrict,
OpcodeWideningPairwiseDotProductS: sideEffectNone,
}
@@ -860,105 +902,75 @@ func (i *Instruction) sideEffect() sideEffect {
// instructionReturnTypes provides the function to determine the return types of an instruction.
var instructionReturnTypes = [opcodeEnd]returnTypesFn{
- OpcodeExtIaddPairwise: returnTypesFnV128,
- OpcodeVbor: returnTypesFnV128,
- OpcodeVbxor: returnTypesFnV128,
- OpcodeVband: returnTypesFnV128,
- OpcodeVbnot: returnTypesFnV128,
- OpcodeVbandnot: returnTypesFnV128,
- OpcodeVbitselect: returnTypesFnV128,
- OpcodeVanyTrue: returnTypesFnI32,
- OpcodeVallTrue: returnTypesFnI32,
- OpcodeVhighBits: returnTypesFnI32,
- OpcodeVIadd: returnTypesFnV128,
- OpcodeVSaddSat: returnTypesFnV128,
- OpcodeVUaddSat: returnTypesFnV128,
- OpcodeVIsub: returnTypesFnV128,
- OpcodeVSsubSat: returnTypesFnV128,
- OpcodeVUsubSat: returnTypesFnV128,
- OpcodeVIcmp: returnTypesFnV128,
- OpcodeVImin: returnTypesFnV128,
- OpcodeVUmin: returnTypesFnV128,
- OpcodeVImax: returnTypesFnV128,
- OpcodeVUmax: returnTypesFnV128,
- OpcodeVImul: returnTypesFnV128,
- OpcodeVAvgRound: returnTypesFnV128,
- OpcodeVIabs: returnTypesFnV128,
- OpcodeVIneg: returnTypesFnV128,
- OpcodeVIpopcnt: returnTypesFnV128,
- OpcodeVIshl: returnTypesFnV128,
- OpcodeVSshr: returnTypesFnV128,
- OpcodeVUshr: returnTypesFnV128,
- OpcodeExtractlane: returnTypesFnSingle,
- OpcodeInsertlane: returnTypesFnV128,
- OpcodeBand: returnTypesFnSingle,
- OpcodeFcopysign: returnTypesFnSingle,
- OpcodeBitcast: returnTypesFnSingle,
- OpcodeBor: returnTypesFnSingle,
- OpcodeBxor: returnTypesFnSingle,
- OpcodeRotl: returnTypesFnSingle,
- OpcodeRotr: returnTypesFnSingle,
- OpcodeIshl: returnTypesFnSingle,
- OpcodeSshr: returnTypesFnSingle,
- OpcodeSdiv: returnTypesFnSingle,
- OpcodeSrem: returnTypesFnSingle,
- OpcodeUdiv: returnTypesFnSingle,
- OpcodeUrem: returnTypesFnSingle,
- OpcodeUshr: returnTypesFnSingle,
- OpcodeJump: returnTypesFnNoReturns,
- OpcodeUndefined: returnTypesFnNoReturns,
- OpcodeIconst: returnTypesFnSingle,
- OpcodeSelect: returnTypesFnSingle,
- OpcodeSExtend: returnTypesFnSingle,
- OpcodeUExtend: returnTypesFnSingle,
- OpcodeSwidenLow: returnTypesFnV128,
- OpcodeUwidenLow: returnTypesFnV128,
- OpcodeSwidenHigh: returnTypesFnV128,
- OpcodeUwidenHigh: returnTypesFnV128,
- OpcodeSnarrow: returnTypesFnV128,
- OpcodeUnarrow: returnTypesFnV128,
- OpcodeSwizzle: returnTypesFnSingle,
- OpcodeShuffle: returnTypesFnV128,
- OpcodeSplat: returnTypesFnV128,
- OpcodeIreduce: returnTypesFnSingle,
- OpcodeFabs: returnTypesFnSingle,
- OpcodeSqrt: returnTypesFnSingle,
- OpcodeCeil: returnTypesFnSingle,
- OpcodeFloor: returnTypesFnSingle,
- OpcodeTrunc: returnTypesFnSingle,
- OpcodeNearest: returnTypesFnSingle,
- OpcodeCallIndirect: func(b *builder, instr *Instruction) (t1 Type, ts []Type) {
- sigID := SignatureID(instr.u1)
- sig, ok := b.signatures[sigID]
- if !ok {
- panic("BUG")
- }
- switch len(sig.Results) {
- case 0:
- t1 = typeInvalid
- case 1:
- t1 = sig.Results[0]
- default:
- t1, ts = sig.Results[0], sig.Results[1:]
- }
- return
- },
- OpcodeCall: func(b *builder, instr *Instruction) (t1 Type, ts []Type) {
- sigID := SignatureID(instr.u2)
- sig, ok := b.signatures[sigID]
- if !ok {
- panic("BUG")
- }
- switch len(sig.Results) {
- case 0:
- t1 = typeInvalid
- case 1:
- t1 = sig.Results[0]
- default:
- t1, ts = sig.Results[0], sig.Results[1:]
- }
- return
- },
+ OpcodeExtIaddPairwise: returnTypesFnV128,
+ OpcodeVbor: returnTypesFnV128,
+ OpcodeVbxor: returnTypesFnV128,
+ OpcodeVband: returnTypesFnV128,
+ OpcodeVbnot: returnTypesFnV128,
+ OpcodeVbandnot: returnTypesFnV128,
+ OpcodeVbitselect: returnTypesFnV128,
+ OpcodeVanyTrue: returnTypesFnI32,
+ OpcodeVallTrue: returnTypesFnI32,
+ OpcodeVhighBits: returnTypesFnI32,
+ OpcodeVIadd: returnTypesFnV128,
+ OpcodeVSaddSat: returnTypesFnV128,
+ OpcodeVUaddSat: returnTypesFnV128,
+ OpcodeVIsub: returnTypesFnV128,
+ OpcodeVSsubSat: returnTypesFnV128,
+ OpcodeVUsubSat: returnTypesFnV128,
+ OpcodeVIcmp: returnTypesFnV128,
+ OpcodeVImin: returnTypesFnV128,
+ OpcodeVUmin: returnTypesFnV128,
+ OpcodeVImax: returnTypesFnV128,
+ OpcodeVUmax: returnTypesFnV128,
+ OpcodeVImul: returnTypesFnV128,
+ OpcodeVAvgRound: returnTypesFnV128,
+ OpcodeVIabs: returnTypesFnV128,
+ OpcodeVIneg: returnTypesFnV128,
+ OpcodeVIpopcnt: returnTypesFnV128,
+ OpcodeVIshl: returnTypesFnV128,
+ OpcodeVSshr: returnTypesFnV128,
+ OpcodeVUshr: returnTypesFnV128,
+ OpcodeExtractlane: returnTypesFnSingle,
+ OpcodeInsertlane: returnTypesFnV128,
+ OpcodeBand: returnTypesFnSingle,
+ OpcodeFcopysign: returnTypesFnSingle,
+ OpcodeBitcast: returnTypesFnSingle,
+ OpcodeBor: returnTypesFnSingle,
+ OpcodeBxor: returnTypesFnSingle,
+ OpcodeRotl: returnTypesFnSingle,
+ OpcodeRotr: returnTypesFnSingle,
+ OpcodeIshl: returnTypesFnSingle,
+ OpcodeSshr: returnTypesFnSingle,
+ OpcodeSdiv: returnTypesFnSingle,
+ OpcodeSrem: returnTypesFnSingle,
+ OpcodeUdiv: returnTypesFnSingle,
+ OpcodeUrem: returnTypesFnSingle,
+ OpcodeUshr: returnTypesFnSingle,
+ OpcodeJump: returnTypesFnNoReturns,
+ OpcodeUndefined: returnTypesFnNoReturns,
+ OpcodeIconst: returnTypesFnSingle,
+ OpcodeSelect: returnTypesFnSingle,
+ OpcodeSExtend: returnTypesFnSingle,
+ OpcodeUExtend: returnTypesFnSingle,
+ OpcodeSwidenLow: returnTypesFnV128,
+ OpcodeUwidenLow: returnTypesFnV128,
+ OpcodeSwidenHigh: returnTypesFnV128,
+ OpcodeUwidenHigh: returnTypesFnV128,
+ OpcodeSnarrow: returnTypesFnV128,
+ OpcodeUnarrow: returnTypesFnV128,
+ OpcodeSwizzle: returnTypesFnSingle,
+ OpcodeShuffle: returnTypesFnV128,
+ OpcodeSplat: returnTypesFnV128,
+ OpcodeIreduce: returnTypesFnSingle,
+ OpcodeFabs: returnTypesFnSingle,
+ OpcodeSqrt: returnTypesFnSingle,
+ OpcodeCeil: returnTypesFnSingle,
+ OpcodeFloor: returnTypesFnSingle,
+ OpcodeTrunc: returnTypesFnSingle,
+ OpcodeNearest: returnTypesFnSingle,
+ OpcodeCallIndirect: returnTypesFnCallIndirect,
+ OpcodeCall: returnTypesFnCall,
OpcodeLoad: returnTypesFnSingle,
OpcodeVZeroExtLoad: returnTypesFnV128,
OpcodeLoadSplat: returnTypesFnV128,
@@ -1032,6 +1044,8 @@ var instructionReturnTypes = [opcodeEnd]returnTypesFn{
OpcodeAtomicStore: returnTypesFnNoReturns,
OpcodeAtomicCas: returnTypesFnSingle,
OpcodeFence: returnTypesFnNoReturns,
+ OpcodeTailCallReturnCallIndirect: returnTypesFnCallIndirect,
+ OpcodeTailCallReturnCall: returnTypesFnCall,
OpcodeWideningPairwiseDotProductS: returnTypesFnV128,
}
@@ -2038,6 +2052,25 @@ func (i *Instruction) AtomicTargetSize() (size uint64) {
return i.u1
}
+// AsTailCallReturnCall initializes this instruction as a call instruction with OpcodeTailCallReturnCall.
+func (i *Instruction) AsTailCallReturnCall(ref FuncRef, sig *Signature, args Values) {
+ i.opcode = OpcodeTailCallReturnCall
+ i.u1 = uint64(ref)
+ i.vs = args
+ i.u2 = uint64(sig.ID)
+ sig.used = true
+}
+
+// AsTailCallReturnCallIndirect initializes this instruction as a call-indirect instruction with OpcodeTailCallReturnCallIndirect.
+func (i *Instruction) AsTailCallReturnCallIndirect(funcPtr Value, sig *Signature, args Values) *Instruction {
+ i.opcode = OpcodeTailCallReturnCallIndirect
+ i.vs = args
+ i.v = funcPtr
+ i.u1 = uint64(sig.ID)
+ sig.used = true
+ return i
+}
+
// ReturnVals returns the return values of OpcodeReturn.
func (i *Instruction) ReturnVals() []Value {
return i.vs.View()
@@ -2166,7 +2199,7 @@ func (i *Instruction) AsCall(ref FuncRef, sig *Signature, args Values) {
// CallData returns the call data for this instruction necessary for backends.
func (i *Instruction) CallData() (ref FuncRef, sigID SignatureID, args []Value) {
- if i.opcode != OpcodeCall {
+ if i.opcode != OpcodeCall && i.opcode != OpcodeTailCallReturnCall {
panic("BUG: CallData only available for OpcodeCall")
}
ref = FuncRef(i.u1)
@@ -2195,8 +2228,8 @@ func (i *Instruction) AsCallGoRuntimeMemmove(funcPtr Value, sig *Signature, args
// CallIndirectData returns the call indirect data for this instruction necessary for backends.
func (i *Instruction) CallIndirectData() (funcPtr Value, sigID SignatureID, args []Value, isGoMemmove bool) {
- if i.opcode != OpcodeCallIndirect {
- panic("BUG: CallIndirectData only available for OpcodeCallIndirect")
+ if i.opcode != OpcodeCallIndirect && i.opcode != OpcodeTailCallReturnCallIndirect {
+ panic("BUG: CallIndirectData only available for OpcodeCallIndirect and OpcodeTailCallReturnCallIndirect")
}
funcPtr = i.v
sigID = SignatureID(i.u1)
@@ -2620,6 +2653,17 @@ func (i *Instruction) Format(b Builder) string {
instSuffix = fmt.Sprintf("_%d, %s, %s, %s", 8*i.u1, i.v.Format(b), i.v2.Format(b), i.v3.Format(b))
case OpcodeFence:
instSuffix = fmt.Sprintf(" %d", i.u1)
+ case OpcodeTailCallReturnCall, OpcodeTailCallReturnCallIndirect:
+ view := i.vs.View()
+ vs := make([]string, len(view))
+ for idx := range vs {
+ vs[idx] = view[idx].Format(b)
+ }
+ if i.opcode == OpcodeCallIndirect {
+ instSuffix = fmt.Sprintf(" %s:%s, %s", i.v.Format(b), SignatureID(i.u1), strings.Join(vs, ", "))
+ } else {
+ instSuffix = fmt.Sprintf(" %s:%s, %s", FuncRef(i.u1), SignatureID(i.u2), strings.Join(vs, ", "))
+ }
case OpcodeWideningPairwiseDotProductS:
instSuffix = fmt.Sprintf(" %s, %s", i.v.Format(b), i.v2.Format(b))
default:
@@ -2879,6 +2923,10 @@ func (o Opcode) String() (ret string) {
return "AtomicStore"
case OpcodeFence:
return "Fence"
+ case OpcodeTailCallReturnCall:
+ return "ReturnCall"
+ case OpcodeTailCallReturnCallIndirect:
+ return "ReturnCallIndirect"
case OpcodeVbor:
return "Vbor"
case OpcodeVbxor:
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go
index 2db61e219..783ab122a 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go
@@ -6,6 +6,7 @@ import (
"fmt"
"math/rand"
"os"
+ "sync"
"time"
)
@@ -91,7 +92,7 @@ type (
initialCompilationDone bool
maybeRandomizedIndexes []int
r *rand.Rand
- values map[string]string
+ values sync.Map
}
verifierStateContextKey struct{}
currentFunctionNameKey struct{}
@@ -106,31 +107,24 @@ func NewDeterministicCompilationVerifierContext(ctx context.Context, localFuncti
}
r := rand.New(rand.NewSource(time.Now().UnixNano()))
return context.WithValue(ctx, verifierStateContextKey{}, &verifierState{
- r: r, maybeRandomizedIndexes: maybeRandomizedIndexes, values: map[string]string{},
+ r: r, maybeRandomizedIndexes: maybeRandomizedIndexes, values: sync.Map{},
})
}
// DeterministicCompilationVerifierRandomizeIndexes randomizes the indexes for the deterministic compilation verifier.
-// To get the randomized index, use DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex.
-func DeterministicCompilationVerifierRandomizeIndexes(ctx context.Context) {
+// Returns a slice that maps an index to the randomized index.
+func DeterministicCompilationVerifierRandomizeIndexes(ctx context.Context) []int {
state := ctx.Value(verifierStateContextKey{}).(*verifierState)
if !state.initialCompilationDone {
// If this is the first attempt, we use the index as-is order.
state.initialCompilationDone = true
- return
+ return state.maybeRandomizedIndexes
}
r := state.r
r.Shuffle(len(state.maybeRandomizedIndexes), func(i, j int) {
state.maybeRandomizedIndexes[i], state.maybeRandomizedIndexes[j] = state.maybeRandomizedIndexes[j], state.maybeRandomizedIndexes[i]
})
-}
-
-// DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex returns the randomized index for the given `index`
-// which is assigned by DeterministicCompilationVerifierRandomizeIndexes.
-func DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex(ctx context.Context, index int) int {
- state := ctx.Value(verifierStateContextKey{}).(*verifierState)
- ret := state.maybeRandomizedIndexes[index]
- return ret
+ return state.maybeRandomizedIndexes
}
// VerifyOrSetDeterministicCompilationContextValue verifies that the `newValue` is the same as the previous value for the given `scope`
@@ -141,9 +135,8 @@ func VerifyOrSetDeterministicCompilationContextValue(ctx context.Context, scope
fn := ctx.Value(currentFunctionNameKey{}).(string)
key := fn + ": " + scope
verifierCtx := ctx.Value(verifierStateContextKey{}).(*verifierState)
- oldValue, ok := verifierCtx.values[key]
- if !ok {
- verifierCtx.values[key] = newValue
+ oldValue, loaded := verifierCtx.values.LoadOrStore(key, newValue)
+ if !loaded {
return
}
if oldValue != newValue {
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go
index 313e34f9a..d67a3262d 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go
@@ -69,7 +69,7 @@ type IDedPool[T any] struct {
// NewIDedPool returns a new IDedPool.
func NewIDedPool[T any](resetFn func(*T)) IDedPool[T] {
- return IDedPool[T]{pool: NewPool[T](resetFn), maxIDEncountered: -1}
+ return IDedPool[T]{pool: NewPool(resetFn), maxIDEncountered: -1}
}
// GetOrAllocate returns the T with the given id.
@@ -134,10 +134,10 @@ type VarLength[T any] struct {
// NewVarLengthPool returns a new VarLengthPool.
func NewVarLengthPool[T any]() VarLengthPool[T] {
return VarLengthPool[T]{
- arrayPool: NewPool[varLengthPoolArray[T]](func(v *varLengthPoolArray[T]) {
+ arrayPool: NewPool(func(v *varLengthPoolArray[T]) {
v.next = 0
}),
- slicePool: NewPool[[]T](func(i *[]T) {
+ slicePool: NewPool(func(i *[]T) {
*i = (*i)[:0]
}),
}
@@ -155,6 +155,9 @@ func (p *VarLengthPool[T]) Allocate(knownMin int) VarLength[T] {
return VarLength[T]{arr: arr}
}
slc := p.slicePool.Allocate()
+ if cap(*slc) < knownMin {
+ *slc = make([]T, 0, knownMin)
+ }
return VarLength[T]{slc: slc}
}
@@ -166,39 +169,36 @@ func (p *VarLengthPool[T]) Reset() {
// Append appends items to the backing slice just like the `append` builtin function in Go.
func (i VarLength[T]) Append(p *VarLengthPool[T], items ...T) VarLength[T] {
- if i.slc != nil {
- *i.slc = append(*i.slc, items...)
+ slc := i.slc
+ if slc != nil {
+ *slc = append(*slc, items...)
return i
}
- if i.arr == nil {
- i.arr = p.arrayPool.Allocate()
+ arr := i.arr
+ if arr == nil {
+ arr = p.arrayPool.Allocate()
+ i.arr = arr
}
- arr := i.arr
if arr.next+len(items) <= arraySize {
- for _, item := range items {
- arr.arr[arr.next] = item
- arr.next++
- }
+ arr.next += copy(arr.arr[arr.next:], items)
} else {
- slc := p.slicePool.Allocate()
+ slc = p.slicePool.Allocate()
// Copy the array to the slice.
- for ptr := 0; ptr < arr.next; ptr++ {
- *slc = append(*slc, arr.arr[ptr])
- }
+ *slc = append(*slc, arr.arr[:arr.next]...)
+ *slc = append(*slc, items...)
i.slc = slc
- *i.slc = append(*i.slc, items...)
}
return i
}
// View returns the backing slice.
func (i VarLength[T]) View() []T {
- if i.slc != nil {
+ if slc := i.slc; slc != nil {
return *i.slc
- } else if i.arr != nil {
- arr := i.arr
+ }
+ if arr := i.arr; arr != nil {
return arr.arr[:arr.next]
}
return nil
@@ -207,9 +207,9 @@ func (i VarLength[T]) View() []T {
// Cut cuts the backing slice to the given length.
// Precondition: n <= len(i.backing).
func (i VarLength[T]) Cut(n int) {
- if i.slc != nil {
- *i.slc = (*i.slc)[:n]
- } else if i.arr != nil {
- i.arr.next = n
+ if slc := i.slc; slc != nil {
+ *slc = (*slc)[:n]
+ } else if arr := i.arr; arr != nil {
+ arr.next = n
}
}