diff options
| author | 2024-08-15 00:08:55 +0000 | |
|---|---|---|
| committer | 2024-08-15 00:08:55 +0000 | |
| commit | 09f24e044653b1327ac1c40f3ab150e3f0184f23 (patch) | |
| tree | 1d9984d053fa5c8d1203abaa49b8752a1532ff11 /vendor/github.com/tetratelabs/wazero/internal/engine | |
| parent | update go-fastcopy to v1.1.3 (#3200) (diff) | |
| download | gotosocial-09f24e044653b1327ac1c40f3ab150e3f0184f23.tar.xz | |
update go-ffmpreg to v0.2.5 (pulls in latest tetratelabs/wazero) (#3203)
Diffstat (limited to 'vendor/github.com/tetratelabs/wazero/internal/engine')
49 files changed, 1636 insertions, 1742 deletions
| diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go index 56dfac620..4e20e4b2c 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go @@ -26,11 +26,14 @@ const (  type (  	controlFrame struct {  		frameID uint32 -		// originalStackLen holds the number of values on the stack +		// originalStackLenWithoutParam holds the number of values on the stack  		// when Start executing this control frame minus params for the block.  		originalStackLenWithoutParam int -		blockType                    *wasm.FunctionType -		kind                         controlFrameKind +		// originalStackLenWithoutParamUint64 is almost the same as originalStackLenWithoutParam +		// except that it holds the number of values on the stack in uint64. +		originalStackLenWithoutParamUint64 int +		blockType                          *wasm.FunctionType +		kind                               controlFrameKind  	}  	controlFrames struct{ frames []controlFrame }  ) @@ -157,9 +160,11 @@ type compiler struct {  	enabledFeatures            api.CoreFeatures  	callFrameStackSizeInUint64 int  	stack                      []unsignedType -	currentFrameID             uint32 -	controlFrames              controlFrames -	unreachableState           struct { +	// stackLenInUint64 is the length of the stack in uint64. +	stackLenInUint64 int +	currentFrameID   uint32 +	controlFrames    controlFrames +	unreachableState struct {  		on    bool  		depth int  	} @@ -341,6 +346,7 @@ func (c *compiler) Next() (*compilationResult, error) {  	c.pc = 0  	c.currentOpPC = 0  	c.currentFrameID = 0 +	c.stackLenInUint64 = 0  	c.unreachableState.on, c.unreachableState.depth = false, 0  	if err := c.compile(sig, code.Body, code.LocalTypes, code.BodyOffsetInCodeSection); err != nil { @@ -449,10 +455,11 @@ operatorSwitch:  		// Create a new frame -- entering this block.  		frame := controlFrame{ -			frameID:                      c.nextFrameID(), -			originalStackLenWithoutParam: len(c.stack) - len(bt.Params), -			kind:                         controlFrameKindBlockWithoutContinuationLabel, -			blockType:                    bt, +			frameID:                            c.nextFrameID(), +			originalStackLenWithoutParam:       len(c.stack) - len(bt.Params), +			originalStackLenWithoutParamUint64: c.stackLenInUint64 - bt.ParamNumInUint64, +			kind:                               controlFrameKindBlockWithoutContinuationLabel, +			blockType:                          bt,  		}  		c.controlFrames.push(frame) @@ -473,10 +480,11 @@ operatorSwitch:  		// Create a new frame -- entering loop.  		frame := controlFrame{ -			frameID:                      c.nextFrameID(), -			originalStackLenWithoutParam: len(c.stack) - len(bt.Params), -			kind:                         controlFrameKindLoop, -			blockType:                    bt, +			frameID:                            c.nextFrameID(), +			originalStackLenWithoutParam:       len(c.stack) - len(bt.Params), +			originalStackLenWithoutParamUint64: c.stackLenInUint64 - bt.ParamNumInUint64, +			kind:                               controlFrameKindLoop, +			blockType:                          bt,  		}  		c.controlFrames.push(frame) @@ -515,8 +523,9 @@ operatorSwitch:  		// Create a new frame -- entering if.  		frame := controlFrame{ -			frameID:                      c.nextFrameID(), -			originalStackLenWithoutParam: len(c.stack) - len(bt.Params), +			frameID:                            c.nextFrameID(), +			originalStackLenWithoutParam:       len(c.stack) - len(bt.Params), +			originalStackLenWithoutParamUint64: c.stackLenInUint64 - bt.ParamNumInUint64,  			// Note this will be set to controlFrameKindIfWithElse  			// when else opcode found later.  			kind:      controlFrameKindIfWithoutElse, @@ -543,7 +552,7 @@ operatorSwitch:  			// If it is currently in unreachable, and the non-nested if,  			// reset the stack so we can correctly handle the else block.  			top := c.controlFrames.top() -			c.stack = c.stack[:top.originalStackLenWithoutParam] +			c.stackSwitchAt(top)  			top.kind = controlFrameKindIfWithElse  			// Re-push the parameters to the if block so that else block can use them. @@ -572,7 +581,7 @@ operatorSwitch:  		// Reset the stack manipulated by the then block, and re-push the block param types to the stack. -		c.stack = c.stack[:frame.originalStackLenWithoutParam] +		c.stackSwitchAt(frame)  		for _, t := range frame.blockType.Params {  			c.stackPush(wasmValueTypeTounsignedType(t))  		} @@ -601,7 +610,7 @@ operatorSwitch:  				return nil  			} -			c.stack = c.stack[:frame.originalStackLenWithoutParam] +			c.stackSwitchAt(frame)  			for _, t := range frame.blockType.Results {  				c.stackPush(wasmValueTypeTounsignedType(t))  			} @@ -628,7 +637,7 @@ operatorSwitch:  		// We need to reset the stack so that  		// the values pushed inside the block.  		dropOp := newOperationDrop(c.getFrameDropRange(frame, true)) -		c.stack = c.stack[:frame.originalStackLenWithoutParam] +		c.stackSwitchAt(frame)  		// Push the result types onto the stack.  		for _, t := range frame.blockType.Results { @@ -3505,6 +3514,11 @@ func (c *compiler) stackPeek() (ret unsignedType) {  	return  } +func (c *compiler) stackSwitchAt(frame *controlFrame) { +	c.stack = c.stack[:frame.originalStackLenWithoutParam] +	c.stackLenInUint64 = frame.originalStackLenWithoutParamUint64 +} +  func (c *compiler) stackPop() (ret unsignedType) {  	// No need to check stack bound  	// as we can assume that all the operations @@ -3512,11 +3526,13 @@ func (c *compiler) stackPop() (ret unsignedType) {  	// at module validation phase.  	ret = c.stack[len(c.stack)-1]  	c.stack = c.stack[:len(c.stack)-1] +	c.stackLenInUint64 -= 1 + int(unsignedTypeV128&ret>>2)  	return  }  func (c *compiler) stackPush(ts unsignedType) {  	c.stack = append(c.stack, ts) +	c.stackLenInUint64 += 1 + int(unsignedTypeV128&ts>>2)  }  // emit adds the operations into the result. @@ -3565,7 +3581,7 @@ func (c *compiler) emitDefaultValue(t wasm.ValueType) {  // of the n-th local.  func (c *compiler) localDepth(index wasm.Index) int {  	height := c.localIndexToStackHeightInUint64[index] -	return c.stackLenInUint64(len(c.stack)) - 1 - int(height) +	return c.stackLenInUint64 - 1 - height  }  func (c *compiler) localType(index wasm.Index) (t wasm.ValueType) { @@ -3592,14 +3608,7 @@ func (c *compiler) getFrameDropRange(frame *controlFrame, isEnd bool) inclusiveR  	} else {  		start = frame.blockType.ResultNumInUint64  	} -	var end int -	if frame.kind == controlFrameKindFunction { -		// On the function return, we eliminate all the contents on the stack -		// including locals (existing below of frame.originalStackLen) -		end = c.stackLenInUint64(len(c.stack)) - 1 -	} else { -		end = c.stackLenInUint64(len(c.stack)) - 1 - c.stackLenInUint64(frame.originalStackLenWithoutParam) -	} +	end := c.stackLenInUint64 - 1 - frame.originalStackLenWithoutParamUint64  	if start <= end {  		return inclusiveRange{Start: int32(start), End: int32(end)}  	} else { @@ -3607,17 +3616,6 @@ func (c *compiler) getFrameDropRange(frame *controlFrame, isEnd bool) inclusiveR  	}  } -func (c *compiler) stackLenInUint64(ceil int) (ret int) { -	for i := 0; i < ceil; i++ { -		if c.stack[i] == unsignedTypeV128 { -			ret += 2 -		} else { -			ret++ -		} -	} -	return -} -  func (c *compiler) readMemoryArg(tag string) (memoryArg, error) {  	c.result.UsesMemory = true  	alignment, num, err := leb128.LoadUint32(c.body[c.pc+1:]) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go index 18c5f4252..ee0b453ca 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go @@ -3901,14 +3901,9 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance  		case operationKindV128Dot:  			x2Hi, x2Lo := ce.popValue(), ce.popValue()  			x1Hi, x1Lo := ce.popValue(), ce.popValue() -			ce.pushValue( -				uint64(uint32(int32(int16(x1Lo>>0))*int32(int16(x2Lo>>0))+int32(int16(x1Lo>>16))*int32(int16(x2Lo>>16)))) | -					(uint64(uint32(int32(int16(x1Lo>>32))*int32(int16(x2Lo>>32))+int32(int16(x1Lo>>48))*int32(int16(x2Lo>>48)))) << 32), -			) -			ce.pushValue( -				uint64(uint32(int32(int16(x1Hi>>0))*int32(int16(x2Hi>>0))+int32(int16(x1Hi>>16))*int32(int16(x2Hi>>16)))) | -					(uint64(uint32(int32(int16(x1Hi>>32))*int32(int16(x2Hi>>32))+int32(int16(x1Hi>>48))*int32(int16(x2Hi>>48)))) << 32), -			) +			lo, hi := v128Dot(x1Hi, x1Lo, x2Hi, x2Lo) +			ce.pushValue(lo) +			ce.pushValue(hi)  			frame.pc++  		case operationKindV128ITruncSatFromF:  			hi, lo := ce.popValue(), ce.popValue() @@ -4584,3 +4579,18 @@ func (ce *callEngine) callGoFuncWithStack(ctx context.Context, m *wasm.ModuleIns  		ce.stack = ce.stack[0 : len(ce.stack)-shrinkLen]  	}  } + +// v128Dot performs a dot product of two 64-bit vectors. +// Note: for some reason (which I suspect is due to a bug in Go compiler's regalloc), +// inlining this function causes a bug which happens **only when** we run with -race AND arm64 AND Go 1.22. +func v128Dot(x1Hi, x1Lo, x2Hi, x2Lo uint64) (uint64, uint64) { +	r1 := int32(int16(x1Lo>>0)) * int32(int16(x2Lo>>0)) +	r2 := int32(int16(x1Lo>>16)) * int32(int16(x2Lo>>16)) +	r3 := int32(int16(x1Lo>>32)) * int32(int16(x2Lo>>32)) +	r4 := int32(int16(x1Lo>>48)) * int32(int16(x2Lo>>48)) +	r5 := int32(int16(x1Hi>>0)) * int32(int16(x2Hi>>0)) +	r6 := int32(int16(x1Hi>>16)) * int32(int16(x2Hi>>16)) +	r7 := int32(int16(x1Hi>>32)) * int32(int16(x2Hi>>32)) +	r8 := int32(int16(x1Hi>>48)) * int32(int16(x2Hi>>48)) +	return uint64(uint32(r1+r2)) | (uint64(uint32(r3+r4)) << 32), uint64(uint32(r5+r6)) | (uint64(uint32(r7+r8)) << 32) +} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go index 59bbfe02d..62d365015 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go @@ -69,7 +69,7 @@ type Compiler interface {  	AllocateVReg(typ ssa.Type) regalloc.VReg  	// ValueDefinition returns the definition of the given value. -	ValueDefinition(ssa.Value) *SSAValueDefinition +	ValueDefinition(ssa.Value) SSAValueDefinition  	// VRegOf returns the virtual register of the given ssa.Value.  	VRegOf(value ssa.Value) regalloc.VReg @@ -79,13 +79,13 @@ type Compiler interface {  	// MatchInstr returns true if the given definition is from an instruction with the given opcode, the current group ID,  	// and a refcount of 1. That means, the instruction can be merged/swapped within the current instruction group. -	MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool +	MatchInstr(def SSAValueDefinition, opcode ssa.Opcode) bool  	// MatchInstrOneOf is the same as MatchInstr but for multiple opcodes. If it matches one of ssa.Opcode,  	// this returns the opcode. Otherwise, this returns ssa.OpcodeInvalid.  	//  	// Note: caller should be careful to avoid excessive allocation on opcodes slice. -	MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode +	MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode  	// AddRelocationInfo appends the relocation information for the function reference at the current buffer offset.  	AddRelocationInfo(funcRef ssa.FuncRef) @@ -126,10 +126,7 @@ type compiler struct {  	nextVRegID regalloc.VRegID  	// ssaValueToVRegs maps ssa.ValueID to regalloc.VReg.  	ssaValueToVRegs [] /* VRegID to */ regalloc.VReg -	// ssaValueDefinitions maps ssa.ValueID to its definition. -	ssaValueDefinitions []SSAValueDefinition -	// ssaValueRefCounts is a cached list obtained by ssa.Builder.ValueRefCounts(). -	ssaValueRefCounts []int +	ssaValuesInfo   []ssa.ValueInfo  	// returnVRegs is the list of virtual registers that store the return values.  	returnVRegs  []regalloc.VReg  	varEdges     [][2]regalloc.VReg @@ -206,15 +203,10 @@ func (c *compiler) setCurrentGroupID(gid ssa.InstructionGroupID) {  // assignVirtualRegisters assigns a virtual register to each ssa.ValueID Valid in the ssa.Builder.  func (c *compiler) assignVirtualRegisters() {  	builder := c.ssaBuilder -	refCounts := builder.ValueRefCounts() -	c.ssaValueRefCounts = refCounts +	c.ssaValuesInfo = builder.ValuesInfo() -	need := len(refCounts) -	if need >= len(c.ssaValueToVRegs) { -		c.ssaValueToVRegs = append(c.ssaValueToVRegs, make([]regalloc.VReg, need+1)...) -	} -	if need >= len(c.ssaValueDefinitions) { -		c.ssaValueDefinitions = append(c.ssaValueDefinitions, make([]SSAValueDefinition, need+1)...) +	if diff := len(c.ssaValuesInfo) - len(c.ssaValueToVRegs); diff > 0 { +		c.ssaValueToVRegs = append(c.ssaValueToVRegs, make([]regalloc.VReg, diff+1)...)  	}  	for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() { @@ -225,40 +217,26 @@ func (c *compiler) assignVirtualRegisters() {  			typ := p.Type()  			vreg := c.AllocateVReg(typ)  			c.ssaValueToVRegs[pid] = vreg -			c.ssaValueDefinitions[pid] = SSAValueDefinition{BlockParamValue: p, BlkParamVReg: vreg}  			c.ssaTypeOfVRegID[vreg.ID()] = p.Type()  		}  		// Assigns each value to a virtual register produced by instructions.  		for cur := blk.Root(); cur != nil; cur = cur.Next() {  			r, rs := cur.Returns() -			var N int  			if r.Valid() {  				id := r.ID()  				ssaTyp := r.Type()  				typ := r.Type()  				vReg := c.AllocateVReg(typ)  				c.ssaValueToVRegs[id] = vReg -				c.ssaValueDefinitions[id] = SSAValueDefinition{ -					Instr:    cur, -					N:        0, -					RefCount: refCounts[id], -				}  				c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp -				N++  			}  			for _, r := range rs {  				id := r.ID()  				ssaTyp := r.Type()  				vReg := c.AllocateVReg(ssaTyp)  				c.ssaValueToVRegs[id] = vReg -				c.ssaValueDefinitions[id] = SSAValueDefinition{ -					Instr:    cur, -					N:        N, -					RefCount: refCounts[id], -				}  				c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp -				N++  			}  		}  	} @@ -299,8 +277,12 @@ func (c *compiler) Init() {  }  // ValueDefinition implements Compiler.ValueDefinition. -func (c *compiler) ValueDefinition(value ssa.Value) *SSAValueDefinition { -	return &c.ssaValueDefinitions[value.ID()] +func (c *compiler) ValueDefinition(value ssa.Value) SSAValueDefinition { +	return SSAValueDefinition{ +		V:        value, +		Instr:    c.ssaBuilder.InstructionOfValue(value), +		RefCount: c.ssaValuesInfo[value.ID()].RefCount, +	}  }  // VRegOf implements Compiler.VRegOf. @@ -319,7 +301,7 @@ func (c *compiler) TypeOf(v regalloc.VReg) ssa.Type {  }  // MatchInstr implements Compiler.MatchInstr. -func (c *compiler) MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool { +func (c *compiler) MatchInstr(def SSAValueDefinition, opcode ssa.Opcode) bool {  	instr := def.Instr  	return def.IsFromInstr() &&  		instr.Opcode() == opcode && @@ -328,7 +310,7 @@ func (c *compiler) MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool {  }  // MatchInstrOneOf implements Compiler.MatchInstrOneOf. -func (c *compiler) MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode { +func (c *compiler) MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode {  	instr := def.Instr  	if !def.IsFromInstr() {  		return ssa.OpcodeInvalid diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go index 80e65668a..735cfa3d3 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go @@ -9,7 +9,7 @@ import (  func (c *compiler) Lower() {  	c.assignVirtualRegisters()  	c.mach.SetCurrentABI(c.GetFunctionABI(c.ssaBuilder.Signature())) -	c.mach.ExecutableContext().StartLoweringFunction(c.ssaBuilder.BlockIDMax()) +	c.mach.StartLoweringFunction(c.ssaBuilder.BlockIDMax())  	c.lowerBlocks()  } @@ -20,12 +20,11 @@ func (c *compiler) lowerBlocks() {  		c.lowerBlock(blk)  	} -	ectx := c.mach.ExecutableContext()  	// After lowering all blocks, we need to link adjacent blocks to layout one single instruction list.  	var prev ssa.BasicBlock  	for next := builder.BlockIteratorReversePostOrderBegin(); next != nil; next = builder.BlockIteratorReversePostOrderNext() {  		if prev != nil { -			ectx.LinkAdjacentBlocks(prev, next) +			c.mach.LinkAdjacentBlocks(prev, next)  		}  		prev = next  	} @@ -33,8 +32,7 @@ func (c *compiler) lowerBlocks() {  func (c *compiler) lowerBlock(blk ssa.BasicBlock) {  	mach := c.mach -	ectx := mach.ExecutableContext() -	ectx.StartBlock(blk) +	mach.StartBlock(blk)  	// We traverse the instructions in reverse order because we might want to lower multiple  	// instructions together. @@ -76,7 +74,7 @@ func (c *compiler) lowerBlock(blk ssa.BasicBlock) {  		default:  			mach.LowerInstr(cur)  		} -		ectx.FlushPendingInstructions() +		mach.FlushPendingInstructions()  	}  	// Finally, if this is the entry block, we have to insert copies of arguments from the real location to the VReg. @@ -84,7 +82,7 @@ func (c *compiler) lowerBlock(blk ssa.BasicBlock) {  		c.lowerFunctionArguments(blk)  	} -	ectx.EndBlock() +	mach.EndBlock()  }  // lowerBranches is called right after StartBlock and before any LowerInstr call if @@ -93,23 +91,24 @@ func (c *compiler) lowerBlock(blk ssa.BasicBlock) {  //  // See ssa.Instruction IsBranching, and the comment on ssa.BasicBlock.  func (c *compiler) lowerBranches(br0, br1 *ssa.Instruction) { -	ectx := c.mach.ExecutableContext() +	mach := c.mach  	c.setCurrentGroupID(br0.GroupID())  	c.mach.LowerSingleBranch(br0) -	ectx.FlushPendingInstructions() +	mach.FlushPendingInstructions()  	if br1 != nil {  		c.setCurrentGroupID(br1.GroupID())  		c.mach.LowerConditionalBranch(br1) -		ectx.FlushPendingInstructions() +		mach.FlushPendingInstructions()  	}  	if br0.Opcode() == ssa.OpcodeJump { -		_, args, target := br0.BranchData() +		_, args, targetBlockID := br0.BranchData()  		argExists := len(args) != 0  		if argExists && br1 != nil {  			panic("BUG: critical edge split failed")  		} +		target := c.ssaBuilder.BasicBlock(targetBlockID)  		if argExists && target.ReturnBlock() {  			if len(args) > 0 {  				c.mach.LowerReturns(args) @@ -118,24 +117,25 @@ func (c *compiler) lowerBranches(br0, br1 *ssa.Instruction) {  			c.lowerBlockArguments(args, target)  		}  	} -	ectx.FlushPendingInstructions() +	mach.FlushPendingInstructions()  }  func (c *compiler) lowerFunctionArguments(entry ssa.BasicBlock) { -	ectx := c.mach.ExecutableContext() +	mach := c.mach  	c.tmpVals = c.tmpVals[:0] +	data := c.ssaBuilder.ValuesInfo()  	for i := 0; i < entry.Params(); i++ {  		p := entry.Param(i) -		if c.ssaValueRefCounts[p.ID()] > 0 { +		if data[p.ID()].RefCount > 0 {  			c.tmpVals = append(c.tmpVals, p)  		} else {  			// If the argument is not used, we can just pass an invalid value.  			c.tmpVals = append(c.tmpVals, ssa.ValueInvalid)  		}  	} -	c.mach.LowerParams(c.tmpVals) -	ectx.FlushPendingInstructions() +	mach.LowerParams(c.tmpVals) +	mach.FlushPendingInstructions()  }  // lowerBlockArguments lowers how to pass arguments to the given successor block. @@ -152,12 +152,12 @@ func (c *compiler) lowerBlockArguments(args []ssa.Value, succ ssa.BasicBlock) {  		src := args[i]  		dstReg := c.VRegOf(dst) -		srcDef := c.ssaValueDefinitions[src.ID()] -		if srcDef.IsFromInstr() && srcDef.Instr.Constant() { +		srcInstr := c.ssaBuilder.InstructionOfValue(src) +		if srcInstr != nil && srcInstr.Constant() {  			c.constEdges = append(c.constEdges, struct {  				cInst *ssa.Instruction  				dst   regalloc.VReg -			}{cInst: srcDef.Instr, dst: dstReg}) +			}{cInst: srcInstr, dst: dstReg})  		} else {  			srcReg := c.VRegOf(src)  			// Even when the src=dst, insert the move so that we can keep such registers keep-alive. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go deleted file mode 100644 index 8e9571b20..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go +++ /dev/null @@ -1,221 +0,0 @@ -package backend - -import ( -	"fmt" -	"math" - -	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" -) - -type ExecutableContext interface { -	// StartLoweringFunction is called when the lowering of the given function is started. -	// maximumBlockID is the maximum value of ssa.BasicBlockID existing in the function. -	StartLoweringFunction(maximumBlockID ssa.BasicBlockID) - -	// LinkAdjacentBlocks is called after finished lowering all blocks in order to create one single instruction list. -	LinkAdjacentBlocks(prev, next ssa.BasicBlock) - -	// StartBlock is called when the compilation of the given block is started. -	// The order of this being called is the reverse post order of the ssa.BasicBlock(s) as we iterate with -	// ssa.Builder BlockIteratorReversePostOrderBegin and BlockIteratorReversePostOrderEnd. -	StartBlock(ssa.BasicBlock) - -	// EndBlock is called when the compilation of the current block is finished. -	EndBlock() - -	// FlushPendingInstructions flushes the pending instructions to the buffer. -	// This will be called after the lowering of each SSA Instruction. -	FlushPendingInstructions() -} - -type ExecutableContextT[Instr any] struct { -	CurrentSSABlk ssa.BasicBlock - -	// InstrPool is the InstructionPool of instructions. -	InstructionPool wazevoapi.Pool[Instr] -	asNop           func(*Instr) -	setNext         func(*Instr, *Instr) -	setPrev         func(*Instr, *Instr) - -	// RootInstr is the root instruction of the executable. -	RootInstr         *Instr -	labelPositionPool wazevoapi.Pool[LabelPosition[Instr]] -	NextLabel         Label -	// LabelPositions maps a label to the instructions of the region which the label represents. -	LabelPositions     []*LabelPosition[Instr] -	OrderedBlockLabels []*LabelPosition[Instr] - -	// PerBlockHead and PerBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock. -	PerBlockHead, PerBlockEnd *Instr -	// PendingInstructions are the instructions which are not yet emitted into the instruction list. -	PendingInstructions []*Instr - -	// SsaBlockIDToLabels maps an SSA block ID to the label. -	SsaBlockIDToLabels []Label -} - -func NewExecutableContextT[Instr any]( -	resetInstruction func(*Instr), -	setNext func(*Instr, *Instr), -	setPrev func(*Instr, *Instr), -	asNop func(*Instr), -) *ExecutableContextT[Instr] { -	return &ExecutableContextT[Instr]{ -		InstructionPool:   wazevoapi.NewPool[Instr](resetInstruction), -		asNop:             asNop, -		setNext:           setNext, -		setPrev:           setPrev, -		labelPositionPool: wazevoapi.NewPool[LabelPosition[Instr]](resetLabelPosition[Instr]), -		NextLabel:         LabelInvalid, -	} -} - -func resetLabelPosition[T any](l *LabelPosition[T]) { -	*l = LabelPosition[T]{} -} - -// StartLoweringFunction implements ExecutableContext. -func (e *ExecutableContextT[Instr]) StartLoweringFunction(max ssa.BasicBlockID) { -	imax := int(max) -	if len(e.SsaBlockIDToLabels) <= imax { -		// Eagerly allocate labels for the blocks since the underlying slice will be used for the next iteration. -		e.SsaBlockIDToLabels = append(e.SsaBlockIDToLabels, make([]Label, imax+1)...) -	} -} - -func (e *ExecutableContextT[Instr]) StartBlock(blk ssa.BasicBlock) { -	e.CurrentSSABlk = blk - -	l := e.SsaBlockIDToLabels[e.CurrentSSABlk.ID()] -	if l == LabelInvalid { -		l = e.AllocateLabel() -		e.SsaBlockIDToLabels[blk.ID()] = l -	} - -	end := e.allocateNop0() -	e.PerBlockHead, e.PerBlockEnd = end, end - -	labelPos := e.GetOrAllocateLabelPosition(l) -	e.OrderedBlockLabels = append(e.OrderedBlockLabels, labelPos) -	labelPos.Begin, labelPos.End = end, end -	labelPos.SB = blk -} - -// EndBlock implements ExecutableContext. -func (e *ExecutableContextT[T]) EndBlock() { -	// Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions. -	e.insertAtPerBlockHead(e.allocateNop0()) - -	l := e.SsaBlockIDToLabels[e.CurrentSSABlk.ID()] -	e.LabelPositions[l].Begin = e.PerBlockHead - -	if e.CurrentSSABlk.EntryBlock() { -		e.RootInstr = e.PerBlockHead -	} -} - -func (e *ExecutableContextT[T]) insertAtPerBlockHead(i *T) { -	if e.PerBlockHead == nil { -		e.PerBlockHead = i -		e.PerBlockEnd = i -		return -	} -	e.setNext(i, e.PerBlockHead) -	e.setPrev(e.PerBlockHead, i) -	e.PerBlockHead = i -} - -// FlushPendingInstructions implements ExecutableContext. -func (e *ExecutableContextT[T]) FlushPendingInstructions() { -	l := len(e.PendingInstructions) -	if l == 0 { -		return -	} -	for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order. -		e.insertAtPerBlockHead(e.PendingInstructions[i]) -	} -	e.PendingInstructions = e.PendingInstructions[:0] -} - -func (e *ExecutableContextT[T]) Reset() { -	e.labelPositionPool.Reset() -	e.InstructionPool.Reset() -	for i := range e.LabelPositions { -		e.LabelPositions[i] = nil -	} -	e.PendingInstructions = e.PendingInstructions[:0] -	e.OrderedBlockLabels = e.OrderedBlockLabels[:0] -	e.RootInstr = nil -	e.SsaBlockIDToLabels = e.SsaBlockIDToLabels[:0] -	e.PerBlockHead, e.PerBlockEnd = nil, nil -	e.NextLabel = LabelInvalid -} - -// AllocateLabel allocates an unused label. -func (e *ExecutableContextT[T]) AllocateLabel() Label { -	e.NextLabel++ -	return e.NextLabel -} - -func (e *ExecutableContextT[T]) GetOrAllocateLabelPosition(l Label) *LabelPosition[T] { -	if len(e.LabelPositions) <= int(l) { -		e.LabelPositions = append(e.LabelPositions, make([]*LabelPosition[T], int(l)+1-len(e.LabelPositions))...) -	} -	ret := e.LabelPositions[l] -	if ret == nil { -		ret = e.labelPositionPool.Allocate() -		ret.L = l -		e.LabelPositions[l] = ret -	} -	return ret -} - -func (e *ExecutableContextT[T]) GetOrAllocateSSABlockLabel(blk ssa.BasicBlock) Label { -	if blk.ReturnBlock() { -		return LabelReturn -	} -	l := e.SsaBlockIDToLabels[blk.ID()] -	if l == LabelInvalid { -		l = e.AllocateLabel() -		e.SsaBlockIDToLabels[blk.ID()] = l -	} -	return l -} - -func (e *ExecutableContextT[T]) allocateNop0() *T { -	i := e.InstructionPool.Allocate() -	e.asNop(i) -	return i -} - -// LinkAdjacentBlocks implements backend.Machine. -func (e *ExecutableContextT[T]) LinkAdjacentBlocks(prev, next ssa.BasicBlock) { -	prevLabelPos := e.LabelPositions[e.GetOrAllocateSSABlockLabel(prev)] -	nextLabelPos := e.LabelPositions[e.GetOrAllocateSSABlockLabel(next)] -	e.setNext(prevLabelPos.End, nextLabelPos.Begin) -} - -// LabelPosition represents the regions of the generated code which the label represents. -type LabelPosition[Instr any] struct { -	SB           ssa.BasicBlock -	L            Label -	Begin, End   *Instr -	BinaryOffset int64 -} - -// Label represents a position in the generated code which is either -// a real instruction or the constant InstructionPool (e.g. jump tables). -// -// This is exactly the same as the traditional "label" in assembly code. -type Label uint32 - -const ( -	LabelInvalid Label = 0 -	LabelReturn  Label = math.MaxUint32 -) - -// String implements backend.Machine. -func (l Label) String() string { -	return fmt.Sprintf("L%d", l) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go index 751050aff..96f035e58 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go @@ -14,7 +14,6 @@ var calleeSavedVRegs = []regalloc.VReg{  // CompileGoFunctionTrampoline implements backend.Machine.  func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte { -	ectx := m.ectx  	argBegin := 1 // Skips exec context by default.  	if needModuleContextPtr {  		argBegin++ @@ -25,7 +24,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *  	m.currentABI = abi  	cur := m.allocateNop() -	ectx.RootInstr = cur +	m.rootInstr = cur  	// Execution context is always the first argument.  	execCtrPtr := raxVReg @@ -272,7 +271,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *  	cur = m.revertRBPRSP(cur)  	linkInstr(cur, m.allocateInstr().asRet()) -	m.encodeWithoutSSA(ectx.RootInstr) +	m.encodeWithoutSSA(m.rootInstr)  	return m.c.Buf()  } @@ -347,10 +346,8 @@ var stackGrowSaveVRegs = []regalloc.VReg{  // CompileStackGrowCallSequence implements backend.Machine.  func (m *machine) CompileStackGrowCallSequence() []byte { -	ectx := m.ectx -  	cur := m.allocateNop() -	ectx.RootInstr = cur +	m.rootInstr = cur  	cur = m.setupRBPRSP(cur) @@ -379,7 +376,7 @@ func (m *machine) CompileStackGrowCallSequence() []byte {  	cur = m.revertRBPRSP(cur)  	linkInstr(cur, m.allocateInstr().asRet()) -	m.encodeWithoutSSA(ectx.RootInstr) +	m.encodeWithoutSSA(m.rootInstr)  	return m.c.Buf()  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go index d27e79c0e..6a3e58f51 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go @@ -17,16 +17,6 @@ type instruction struct {  	kind                instructionKind  } -// Next implements regalloc.Instr. -func (i *instruction) Next() regalloc.Instr { -	return i.next -} - -// Prev implements regalloc.Instr. -func (i *instruction) Prev() regalloc.Instr { -	return i.prev -} -  // IsCall implements regalloc.Instr.  func (i *instruction) IsCall() bool { return i.kind == call } @@ -36,9 +26,6 @@ func (i *instruction) IsIndirectCall() bool { return i.kind == callIndirect }  // IsReturn implements regalloc.Instr.  func (i *instruction) IsReturn() bool { return i.kind == ret } -// AddedBeforeRegAlloc implements regalloc.Instr. -func (i *instruction) AddedBeforeRegAlloc() bool { return i.addedBeforeRegAlloc } -  // String implements regalloc.Instr.  func (i *instruction) String() string {  	switch i.kind { @@ -651,26 +638,14 @@ func resetInstruction(i *instruction) {  	*i = instruction{}  } -func setNext(i *instruction, next *instruction) { -	i.next = next -} - -func setPrev(i *instruction, prev *instruction) { -	i.prev = prev -} - -func asNop(i *instruction) { -	i.kind = nop0 -} - -func (i *instruction) asNop0WithLabel(label backend.Label) *instruction { //nolint +func (i *instruction) asNop0WithLabel(label label) *instruction { //nolint  	i.kind = nop0  	i.u1 = uint64(label)  	return i  } -func (i *instruction) nop0Label() backend.Label { -	return backend.Label(i.u1) +func (i *instruction) nop0Label() label { +	return label(i.u1)  }  type instructionKind byte @@ -1161,7 +1136,7 @@ func (i *instruction) asJmp(target operand) *instruction {  	return i  } -func (i *instruction) jmpLabel() backend.Label { +func (i *instruction) jmpLabel() label {  	switch i.kind {  	case jmp, jmpIf, lea, xmmUnaryRmR:  		return i.op1.label() diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go index bee673d25..befe8c643 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go @@ -130,9 +130,9 @@ func (m *machine) lowerAddendsToAmode(x, y addend, offBase uint32) *amode {  	}  } -func (m *machine) lowerAddend(x *backend.SSAValueDefinition) addend { -	if x.IsFromBlockParam() { -		return addend{x.BlkParamVReg, 0, 0} +func (m *machine) lowerAddend(x backend.SSAValueDefinition) addend { +	if !x.IsFromInstr() { +		return addend{m.c.VRegOf(x.V), 0, 0}  	}  	// Ensure the addend is not referenced in multiple places; we will discard nested Iadds.  	op := m.c.MatchInstrOneOf(x, addendsMatchOpcodes[:]) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go index 61ae6f406..aeeb6b645 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go @@ -16,18 +16,13 @@ import (  // NewBackend returns a new backend for arm64.  func NewBackend() backend.Machine { -	ectx := backend.NewExecutableContextT[instruction]( -		resetInstruction, -		setNext, -		setPrev, -		asNop, -	) -	return &machine{ -		ectx:                                ectx, +	m := &machine{  		cpuFeatures:                         platform.CpuFeatures, -		regAlloc:                            regalloc.NewAllocator(regInfo), +		regAlloc:                            regalloc.NewAllocator[*instruction, *labelPosition, *regAllocFn](regInfo),  		spillSlots:                          map[regalloc.VRegID]int64{},  		amodePool:                           wazevoapi.NewPool[amode](nil), +		labelPositionPool:                   wazevoapi.NewIDedPool[labelPosition](resetLabelPosition), +		instrPool:                           wazevoapi.NewPool[instruction](resetInstruction),  		constSwizzleMaskConstIndex:          -1,  		constSqmulRoundSatIndex:             -1,  		constI8x16SHLMaskTableIndex:         -1, @@ -41,23 +36,46 @@ func NewBackend() backend.Machine {  		constExtAddPairwiseI16x8uMask1Index: -1,  		constExtAddPairwiseI16x8uMask2Index: -1,  	} +	m.regAllocFn.m = m +	return m  }  type (  	// machine implements backend.Machine for amd64.  	machine struct {  		c                        backend.Compiler -		ectx                     *backend.ExecutableContextT[instruction]  		stackBoundsCheckDisabled bool +		instrPool wazevoapi.Pool[instruction]  		amodePool wazevoapi.Pool[amode]  		cpuFeatures platform.CpuFeatureFlags -		regAlloc        regalloc.Allocator -		regAllocFn      *backend.RegAllocFunction[*instruction, *machine] +		regAlloc        regalloc.Allocator[*instruction, *labelPosition, *regAllocFn] +		regAllocFn      regAllocFn  		regAllocStarted bool +		// labelPositionPool is the pool of labelPosition. The id is the label where +		// if the label is less than the maxSSABlockID, it's the ssa.BasicBlockID. +		labelPositionPool wazevoapi.IDedPool[labelPosition] +		// nextLabel is the next label to be allocated. The first free label comes after maxSSABlockID +		// so that we can have an identical label for the SSA block ID, which is useful for debugging. +		nextLabel label +		// rootInstr is the first instruction of the function. +		rootInstr *instruction +		// currentLabelPos is the currently-compiled ssa.BasicBlock's labelPosition. +		currentLabelPos *labelPosition +		// orderedSSABlockLabelPos is the ordered list of labelPosition in the generated code for each ssa.BasicBlock. +		orderedSSABlockLabelPos []*labelPosition +		// returnLabelPos is the labelPosition for the return block. +		returnLabelPos labelPosition +		// perBlockHead and perBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock. +		perBlockHead, perBlockEnd *instruction +		// pendingInstructions are the instructions which are not yet emitted into the instruction list. +		pendingInstructions []*instruction +		// maxSSABlockID is the maximum ssa.BasicBlockID in the current function. +		maxSSABlockID label +  		spillSlotSize int64  		spillSlots    map[regalloc.VRegID]int64  		currentABI    *backend.FunctionABI @@ -67,8 +85,11 @@ type (  		labelResolutionPends []labelResolutionPend +		// jmpTableTargets holds the labels of the jump table targets.  		jmpTableTargets [][]uint32 -		consts          []_const +		// jmpTableTargetNext is the index to the jmpTableTargets slice to be used for the next jump table. +		jmpTableTargetsNext int +		consts              []_const  		constSwizzleMaskConstIndex, constSqmulRoundSatIndex,  		constI8x16SHLMaskTableIndex, constI8x16LogicalSHRMaskTableIndex, @@ -79,9 +100,10 @@ type (  	}  	_const struct { -		lo, hi uint64 -		_var   []byte -		label  *labelPosition +		lo, hi   uint64 +		_var     []byte +		label    label +		labelPos *labelPosition  	}  	labelResolutionPend struct { @@ -90,22 +112,73 @@ type (  		// imm32Offset is the offset of the last 4 bytes of the instruction.  		imm32Offset int64  	} +) -	labelPosition = backend.LabelPosition[instruction] +type ( +	// label represents a position in the generated code which is either +	// a real instruction or the constant InstructionPool (e.g. jump tables). +	// +	// This is exactly the same as the traditional "label" in assembly code. +	label uint32 + +	// labelPosition represents the regions of the generated code which the label represents. +	// This implements regalloc.Block. +	labelPosition struct { +		// sb is not nil if this corresponds to a ssa.BasicBlock. +		sb ssa.BasicBlock +		// cur is used to walk through the instructions in the block during the register allocation. +		cur, +		// begin and end are the first and last instructions of the block. +		begin, end *instruction +		// binaryOffset is the offset in the binary where the label is located. +		binaryOffset int64 +	}  ) -func (m *machine) getOrAllocateConstLabel(i *int, _var []byte) backend.Label { +// String implements backend.Machine. +func (l label) String() string { +	return fmt.Sprintf("L%d", l) +} + +func resetLabelPosition(l *labelPosition) { +	*l = labelPosition{} +} + +const labelReturn = math.MaxUint32 + +func ssaBlockLabel(sb ssa.BasicBlock) label { +	if sb.ReturnBlock() { +		return labelReturn +	} +	return label(sb.ID()) +} + +// getOrAllocateSSABlockLabelPosition returns the labelPosition for the given basic block. +func (m *machine) getOrAllocateSSABlockLabelPosition(sb ssa.BasicBlock) *labelPosition { +	if sb.ReturnBlock() { +		m.returnLabelPos.sb = sb +		return &m.returnLabelPos +	} + +	l := ssaBlockLabel(sb) +	pos := m.labelPositionPool.GetOrAllocate(int(l)) +	pos.sb = sb +	return pos +} + +func (m *machine) getOrAllocateConstLabel(i *int, _var []byte) label {  	index := *i  	if index == -1 { -		label := m.allocateLabel() +		l, pos := m.allocateLabel()  		index = len(m.consts)  		m.consts = append(m.consts, _const{ -			_var:  _var, -			label: label, +			_var:     _var, +			label:    l, +			labelPos: pos,  		})  		*i = index  	} -	return m.consts[index].label.L +	return m.consts[index].label  }  // Reset implements backend.Machine. @@ -120,18 +193,20 @@ func (m *machine) Reset() {  	}  	m.stackBoundsCheckDisabled = false -	m.ectx.Reset() - -	m.regAllocFn.Reset()  	m.regAlloc.Reset() +	m.labelPositionPool.Reset() +	m.instrPool.Reset()  	m.regAllocStarted = false  	m.clobberedRegs = m.clobberedRegs[:0]  	m.spillSlotSize = 0  	m.maxRequiredStackSizeForCalls = 0 +	m.perBlockHead, m.perBlockEnd, m.rootInstr = nil, nil, nil +	m.pendingInstructions = m.pendingInstructions[:0] +	m.orderedSSABlockLabelPos = m.orderedSSABlockLabelPos[:0]  	m.amodePool.Reset() -	m.jmpTableTargets = m.jmpTableTargets[:0] +	m.jmpTableTargetsNext = 0  	m.constSwizzleMaskConstIndex = -1  	m.constSqmulRoundSatIndex = -1  	m.constI8x16SHLMaskTableIndex = -1 @@ -146,8 +221,63 @@ func (m *machine) Reset() {  	m.constExtAddPairwiseI16x8uMask2Index = -1  } -// ExecutableContext implements backend.Machine. -func (m *machine) ExecutableContext() backend.ExecutableContext { return m.ectx } +// StartLoweringFunction implements backend.Machine StartLoweringFunction. +func (m *machine) StartLoweringFunction(maxBlockID ssa.BasicBlockID) { +	m.maxSSABlockID = label(maxBlockID) +	m.nextLabel = label(maxBlockID) + 1 +} + +// LinkAdjacentBlocks implements backend.Machine. +func (m *machine) LinkAdjacentBlocks(prev, next ssa.BasicBlock) { +	prevPos, nextPos := m.getOrAllocateSSABlockLabelPosition(prev), m.getOrAllocateSSABlockLabelPosition(next) +	prevPos.end.next = nextPos.begin +} + +// StartBlock implements backend.Machine. +func (m *machine) StartBlock(blk ssa.BasicBlock) { +	m.currentLabelPos = m.getOrAllocateSSABlockLabelPosition(blk) +	labelPos := m.currentLabelPos +	end := m.allocateNop() +	m.perBlockHead, m.perBlockEnd = end, end +	labelPos.begin, labelPos.end = end, end +	m.orderedSSABlockLabelPos = append(m.orderedSSABlockLabelPos, labelPos) +} + +// EndBlock implements ExecutableContext. +func (m *machine) EndBlock() { +	// Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions. +	m.insertAtPerBlockHead(m.allocateNop()) + +	m.currentLabelPos.begin = m.perBlockHead + +	if m.currentLabelPos.sb.EntryBlock() { +		m.rootInstr = m.perBlockHead +	} +} + +func (m *machine) insertAtPerBlockHead(i *instruction) { +	if m.perBlockHead == nil { +		m.perBlockHead = i +		m.perBlockEnd = i +		return +	} + +	i.next = m.perBlockHead +	m.perBlockHead.prev = i +	m.perBlockHead = i +} + +// FlushPendingInstructions implements backend.Machine. +func (m *machine) FlushPendingInstructions() { +	l := len(m.pendingInstructions) +	if l == 0 { +		return +	} +	for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order. +		m.insertAtPerBlockHead(m.pendingInstructions[i]) +	} +	m.pendingInstructions = m.pendingInstructions[:0] +}  // DisableStackCheck implements backend.Machine.  func (m *machine) DisableStackCheck() { m.stackBoundsCheckDisabled = true } @@ -155,23 +285,17 @@ func (m *machine) DisableStackCheck() { m.stackBoundsCheckDisabled = true }  // SetCompiler implements backend.Machine.  func (m *machine) SetCompiler(c backend.Compiler) {  	m.c = c -	m.regAllocFn = backend.NewRegAllocFunction[*instruction, *machine](m, c.SSABuilder(), c) +	m.regAllocFn.ssaB = c.SSABuilder()  }  // SetCurrentABI implements backend.Machine. -func (m *machine) SetCurrentABI(abi *backend.FunctionABI) { -	m.currentABI = abi -} +func (m *machine) SetCurrentABI(abi *backend.FunctionABI) { m.currentABI = abi }  // RegAlloc implements backend.Machine.  func (m *machine) RegAlloc() {  	rf := m.regAllocFn -	for _, pos := range m.ectx.OrderedBlockLabels { -		rf.AddBlock(pos.SB, pos.L, pos.Begin, pos.End) -	} -  	m.regAllocStarted = true -	m.regAlloc.DoAllocation(rf) +	m.regAlloc.DoAllocation(&rf)  	// Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes.  	m.spillSlotSize = (m.spillSlotSize + 15) &^ 15  } @@ -184,49 +308,54 @@ func (m *machine) InsertReturn() {  // LowerSingleBranch implements backend.Machine.  func (m *machine) LowerSingleBranch(b *ssa.Instruction) { -	ectx := m.ectx  	switch b.Opcode() {  	case ssa.OpcodeJump: -		_, _, targetBlk := b.BranchData() +		_, _, targetBlkID := b.BranchData()  		if b.IsFallthroughJump() {  			return  		}  		jmp := m.allocateInstr() -		target := ectx.GetOrAllocateSSABlockLabel(targetBlk) -		if target == backend.LabelReturn { +		target := ssaBlockLabel(m.c.SSABuilder().BasicBlock(targetBlkID)) +		if target == labelReturn {  			jmp.asRet()  		} else {  			jmp.asJmp(newOperandLabel(target))  		}  		m.insert(jmp)  	case ssa.OpcodeBrTable: -		index, target := b.BrTableData() -		m.lowerBrTable(index, target) +		index, targetBlkIDs := b.BrTableData() +		m.lowerBrTable(index, targetBlkIDs)  	default:  		panic("BUG: unexpected branch opcode" + b.Opcode().String())  	}  } -func (m *machine) addJmpTableTarget(targets []ssa.BasicBlock) (index int) { -	// TODO: reuse the slice! -	labels := make([]uint32, len(targets)) -	for j, target := range targets { -		labels[j] = uint32(m.ectx.GetOrAllocateSSABlockLabel(target)) +func (m *machine) addJmpTableTarget(targets ssa.Values) (index int) { +	if m.jmpTableTargetsNext == len(m.jmpTableTargets) { +		m.jmpTableTargets = append(m.jmpTableTargets, make([]uint32, 0, len(targets.View()))) +	} + +	index = m.jmpTableTargetsNext +	m.jmpTableTargetsNext++ +	m.jmpTableTargets[index] = m.jmpTableTargets[index][:0] +	for _, targetBlockID := range targets.View() { +		target := m.c.SSABuilder().BasicBlock(ssa.BasicBlockID(targetBlockID)) +		m.jmpTableTargets[index] = append(m.jmpTableTargets[index], uint32(ssaBlockLabel(target)))  	} -	index = len(m.jmpTableTargets) -	m.jmpTableTargets = append(m.jmpTableTargets, labels)  	return  }  var condBranchMatches = [...]ssa.Opcode{ssa.OpcodeIcmp, ssa.OpcodeFcmp} -func (m *machine) lowerBrTable(index ssa.Value, targets []ssa.BasicBlock) { +func (m *machine) lowerBrTable(index ssa.Value, targets ssa.Values) {  	_v := m.getOperand_Reg(m.c.ValueDefinition(index))  	v := m.copyToTmp(_v.reg()) +	targetCount := len(targets.View()) +  	// First, we need to do the bounds check.  	maxIndex := m.c.AllocateVReg(ssa.TypeI32) -	m.lowerIconst(maxIndex, uint64(len(targets)-1), false) +	m.lowerIconst(maxIndex, uint64(targetCount-1), false)  	cmp := m.allocateInstr().asCmpRmiR(true, newOperandReg(maxIndex), v, false)  	m.insert(cmp) @@ -255,23 +384,22 @@ func (m *machine) lowerBrTable(index ssa.Value, targets []ssa.BasicBlock) {  	jmpTable := m.allocateInstr()  	targetSliceIndex := m.addJmpTableTarget(targets) -	jmpTable.asJmpTableSequence(targetSliceIndex, len(targets)) +	jmpTable.asJmpTableSequence(targetSliceIndex, targetCount)  	m.insert(jmpTable)  }  // LowerConditionalBranch implements backend.Machine.  func (m *machine) LowerConditionalBranch(b *ssa.Instruction) { -	exctx := m.ectx -	cval, args, targetBlk := b.BranchData() +	cval, args, targetBlkID := b.BranchData()  	if len(args) > 0 {  		panic(fmt.Sprintf(  			"conditional branch shouldn't have args; likely a bug in critical edge splitting: from %s to %s", -			exctx.CurrentSSABlk, -			targetBlk, +			m.currentLabelPos.sb, +			targetBlkID,  		))  	} -	target := exctx.GetOrAllocateSSABlockLabel(targetBlk) +	target := ssaBlockLabel(m.c.SSABuilder().BasicBlock(targetBlkID))  	cvalDef := m.c.ValueDefinition(cval)  	switch m.c.MatchInstrOneOf(cvalDef, condBranchMatches[:]) { @@ -1272,9 +1400,9 @@ func (m *machine) lowerVconst(dst regalloc.VReg, lo, hi uint64) {  	}  	load := m.allocateInstr() -	constLabel := m.allocateLabel() -	m.consts = append(m.consts, _const{label: constLabel, lo: lo, hi: hi}) -	load.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(constLabel.L)), dst) +	l, pos := m.allocateLabel() +	m.consts = append(m.consts, _const{label: l, labelPos: pos, lo: lo, hi: hi}) +	load.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(l)), dst)  	m.insert(load)  } @@ -1473,21 +1601,24 @@ func (m *machine) lowerExitIfTrueWithCode(execCtx regalloc.VReg, cond ssa.Value,  	jmpIf.asJmpIf(condFromSSAIntCmpCond(c).invert(), newOperandLabel(l))  } -func (m *machine) tryLowerBandToFlag(x, y *backend.SSAValueDefinition) (ok bool) { -	var target *backend.SSAValueDefinition +func (m *machine) tryLowerBandToFlag(x, y backend.SSAValueDefinition) (ok bool) { +	var target backend.SSAValueDefinition +	var got bool  	if x.IsFromInstr() && x.Instr.Constant() && x.Instr.ConstantVal() == 0 {  		if m.c.MatchInstr(y, ssa.OpcodeBand) {  			target = y +			got = true  		}  	}  	if y.IsFromInstr() && y.Instr.Constant() && y.Instr.ConstantVal() == 0 {  		if m.c.MatchInstr(x, ssa.OpcodeBand) {  			target = x +			got = true  		}  	} -	if target == nil { +	if !got {  		return false  	} @@ -1522,7 +1653,7 @@ func (m *machine) allocateExitInstructions(execCtx, exitCodeReg regalloc.VReg) (  	return  } -func (m *machine) lowerExitWithCode(execCtx regalloc.VReg, code wazevoapi.ExitCode) (afterLabel backend.Label) { +func (m *machine) lowerExitWithCode(execCtx regalloc.VReg, code wazevoapi.ExitCode) (afterLabel label) {  	exitCodeReg := rbpVReg  	saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtx, exitCodeReg) @@ -1819,9 +1950,9 @@ func (m *machine) lowerCall(si *ssa.Instruction) {  // callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the  // caller side of the function call. -func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, stackSlotSize int64) { +func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def backend.SSAValueDefinition, stackSlotSize int64) {  	arg := &a.Args[argIndex] -	if def != nil && def.IsFromInstr() { +	if def.IsFromInstr() {  		// Constant instructions are inlined.  		if inst := def.Instr; inst.Constant() {  			m.insertLoadConstant(inst, reg) @@ -1904,25 +2035,20 @@ func (m *machine) InsertMove(dst, src regalloc.VReg, typ ssa.Type) {  // Format implements backend.Machine.  func (m *machine) Format() string { -	ectx := m.ectx -	begins := map[*instruction]backend.Label{} -	for _, pos := range ectx.LabelPositions { +	begins := map[*instruction]label{} +	for l := label(0); l < m.nextLabel; l++ { +		pos := m.labelPositionPool.Get(int(l))  		if pos != nil { -			begins[pos.Begin] = pos.L +			begins[pos.begin] = l  		}  	} -	irBlocks := map[backend.Label]ssa.BasicBlockID{} -	for i, l := range ectx.SsaBlockIDToLabels { -		irBlocks[l] = ssa.BasicBlockID(i) -	} -  	var lines []string -	for cur := ectx.RootInstr; cur != nil; cur = cur.next { +	for cur := m.rootInstr; cur != nil; cur = cur.next {  		if l, ok := begins[cur]; ok {  			var labelStr string -			if blkID, ok := irBlocks[l]; ok { -				labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID) +			if l <= m.maxSSABlockID { +				labelStr = fmt.Sprintf("%s (SSA Block: blk%d):", l, l)  			} else {  				labelStr = fmt.Sprintf("%s:", l)  			} @@ -1935,9 +2061,9 @@ func (m *machine) Format() string {  	}  	for _, vc := range m.consts {  		if vc._var == nil { -			lines = append(lines, fmt.Sprintf("%s: const [%d %d]", vc.label.L, vc.lo, vc.hi)) +			lines = append(lines, fmt.Sprintf("%s: const [%d %d]", vc.label, vc.lo, vc.hi))  		} else { -			lines = append(lines, fmt.Sprintf("%s: const %#x", vc.label.L, vc._var)) +			lines = append(lines, fmt.Sprintf("%s: const %#x", vc.label, vc._var))  		}  	}  	return "\n" + strings.Join(lines, "\n") + "\n" @@ -1945,18 +2071,14 @@ func (m *machine) Format() string {  func (m *machine) encodeWithoutSSA(root *instruction) {  	m.labelResolutionPends = m.labelResolutionPends[:0] -	ectx := m.ectx -  	bufPtr := m.c.BufPtr()  	for cur := root; cur != nil; cur = cur.next {  		offset := int64(len(*bufPtr))  		if cur.kind == nop0 {  			l := cur.nop0Label() -			if int(l) >= len(ectx.LabelPositions) { -				continue -			} -			if pos := ectx.LabelPositions[l]; pos != nil { -				pos.BinaryOffset = offset +			pos := m.labelPositionPool.Get(int(l)) +			if pos != nil { +				pos.binaryOffset = offset  			}  		} @@ -1973,7 +2095,7 @@ func (m *machine) encodeWithoutSSA(root *instruction) {  		switch p.instr.kind {  		case jmp, jmpIf, lea:  			target := p.instr.jmpLabel() -			targetOffset := ectx.LabelPositions[target].BinaryOffset +			targetOffset := m.labelPositionPool.Get(int(target)).binaryOffset  			imm32Offset := p.imm32Offset  			jmpOffset := int32(targetOffset - (p.imm32Offset + 4)) // +4 because RIP points to the next instruction.  			binary.LittleEndian.PutUint32((*bufPtr)[imm32Offset:], uint32(jmpOffset)) @@ -1985,33 +2107,33 @@ func (m *machine) encodeWithoutSSA(root *instruction) {  // Encode implements backend.Machine Encode.  func (m *machine) Encode(ctx context.Context) (err error) { -	ectx := m.ectx  	bufPtr := m.c.BufPtr()  	var fn string  	var fnIndex int -	var labelToSSABlockID map[backend.Label]ssa.BasicBlockID +	var labelPosToLabel map[*labelPosition]label  	if wazevoapi.PerfMapEnabled {  		fn = wazevoapi.GetCurrentFunctionName(ctx) -		labelToSSABlockID = make(map[backend.Label]ssa.BasicBlockID) -		for i, l := range ectx.SsaBlockIDToLabels { -			labelToSSABlockID[l] = ssa.BasicBlockID(i) +		labelPosToLabel = make(map[*labelPosition]label) +		for i := 0; i <= m.labelPositionPool.MaxIDEncountered(); i++ { +			pos := m.labelPositionPool.Get(i) +			labelPosToLabel[pos] = label(i)  		}  		fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx)  	}  	m.labelResolutionPends = m.labelResolutionPends[:0] -	for _, pos := range ectx.OrderedBlockLabels { +	for _, pos := range m.orderedSSABlockLabelPos {  		offset := int64(len(*bufPtr)) -		pos.BinaryOffset = offset -		for cur := pos.Begin; cur != pos.End.next; cur = cur.next { +		pos.binaryOffset = offset +		for cur := pos.begin; cur != pos.end.next; cur = cur.next {  			offset := int64(len(*bufPtr))  			switch cur.kind {  			case nop0:  				l := cur.nop0Label() -				if pos := ectx.LabelPositions[l]; pos != nil { -					pos.BinaryOffset = offset +				if pos := m.labelPositionPool.Get(int(l)); pos != nil { +					pos.binaryOffset = offset  				}  			case sourceOffsetInfo:  				m.c.AddSourceOffsetInfo(offset, cur.sourceOffsetInfo()) @@ -2026,22 +2148,16 @@ func (m *machine) Encode(ctx context.Context) (err error) {  		}  		if wazevoapi.PerfMapEnabled { -			l := pos.L -			var labelStr string -			if blkID, ok := labelToSSABlockID[l]; ok { -				labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID) -			} else { -				labelStr = l.String() -			} +			l := labelPosToLabel[pos]  			size := int64(len(*bufPtr)) - offset -			wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr)) +			wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, l))  		}  	}  	for i := range m.consts {  		offset := int64(len(*bufPtr))  		vc := &m.consts[i] -		vc.label.BinaryOffset = offset +		vc.labelPos.binaryOffset = offset  		if vc._var == nil {  			lo, hi := vc.lo, vc.hi  			m.c.Emit8Bytes(lo) @@ -2059,7 +2175,7 @@ func (m *machine) Encode(ctx context.Context) (err error) {  		switch p.instr.kind {  		case jmp, jmpIf, lea, xmmUnaryRmR:  			target := p.instr.jmpLabel() -			targetOffset := ectx.LabelPositions[target].BinaryOffset +			targetOffset := m.labelPositionPool.Get(int(target)).binaryOffset  			imm32Offset := p.imm32Offset  			jmpOffset := int32(targetOffset - (p.imm32Offset + 4)) // +4 because RIP points to the next instruction.  			binary.LittleEndian.PutUint32(buf[imm32Offset:], uint32(jmpOffset)) @@ -2068,7 +2184,7 @@ func (m *machine) Encode(ctx context.Context) (err error) {  			// Each entry is the offset from the beginning of the jmpTableIsland instruction in 8 bytes.  			targets := m.jmpTableTargets[p.instr.u1]  			for i, l := range targets { -				targetOffset := ectx.LabelPositions[backend.Label(l)].BinaryOffset +				targetOffset := m.labelPositionPool.Get(int(l)).binaryOffset  				jmpOffset := targetOffset - tableBegin  				binary.LittleEndian.PutUint64(buf[tableBegin+int64(i)*8:], uint64(jmpOffset))  			} @@ -2097,7 +2213,7 @@ func (m *machine) ResolveRelocations(refToBinaryOffset []int, binary []byte, rel  // CallTrampolineIslandInfo implements backend.Machine CallTrampolineIslandInfo.  func (m *machine) CallTrampolineIslandInfo(_ int) (_, _ int, _ error) { return } -func (m *machine) lowerIcmpToFlag(xd, yd *backend.SSAValueDefinition, _64 bool) { +func (m *machine) lowerIcmpToFlag(xd, yd backend.SSAValueDefinition, _64 bool) {  	x := m.getOperand_Reg(xd)  	y := m.getOperand_Mem_Imm32_Reg(yd)  	cmp := m.allocateInstr().asCmpRmiR(true, y, x.reg(), _64) @@ -2140,7 +2256,7 @@ func (m *machine) lowerFcmpToFlags(instr *ssa.Instruction) (f1, f2 cond, and boo  // allocateInstr allocates an instruction.  func (m *machine) allocateInstr() *instruction { -	instr := m.ectx.InstructionPool.Allocate() +	instr := m.instrPool.Allocate()  	if !m.regAllocStarted {  		instr.addedBeforeRegAlloc = true  	} @@ -2154,24 +2270,22 @@ func (m *machine) allocateNop() *instruction {  }  func (m *machine) insert(i *instruction) { -	ectx := m.ectx -	ectx.PendingInstructions = append(ectx.PendingInstructions, i) +	m.pendingInstructions = append(m.pendingInstructions, i)  } -func (m *machine) allocateBrTarget() (nop *instruction, l backend.Label) { //nolint -	pos := m.allocateLabel() -	l = pos.L +func (m *machine) allocateBrTarget() (nop *instruction, l label) { //nolint +	l, pos := m.allocateLabel()  	nop = m.allocateInstr()  	nop.asNop0WithLabel(l) -	pos.Begin, pos.End = nop, nop +	pos.begin, pos.end = nop, nop  	return  } -func (m *machine) allocateLabel() *labelPosition { -	ectx := m.ectx -	l := ectx.AllocateLabel() -	pos := ectx.GetOrAllocateLabelPosition(l) -	return pos +func (m *machine) allocateLabel() (label, *labelPosition) { +	l := m.nextLabel +	pos := m.labelPositionPool.GetOrAllocate(int(l)) +	m.nextLabel++ +	return l, pos  }  func (m *machine) getVRegSpillSlotOffsetFromSP(id regalloc.VRegID, size byte) int64 { @@ -3185,22 +3299,22 @@ func (m *machine) lowerShuffle(x, y ssa.Value, lo, hi uint64, ret ssa.Value) {  		}  	} -	xmaskLabel := m.allocateLabel() -	m.consts = append(m.consts, _const{lo: xMask[0], hi: xMask[1], label: xmaskLabel}) -	ymaskLabel := m.allocateLabel() -	m.consts = append(m.consts, _const{lo: yMask[0], hi: yMask[1], label: ymaskLabel}) +	xl, xmaskPos := m.allocateLabel() +	m.consts = append(m.consts, _const{lo: xMask[0], hi: xMask[1], label: xl, labelPos: xmaskPos}) +	yl, ymaskPos := m.allocateLabel() +	m.consts = append(m.consts, _const{lo: yMask[0], hi: yMask[1], label: yl, labelPos: ymaskPos})  	xx, yy := m.getOperand_Reg(m.c.ValueDefinition(x)), m.getOperand_Reg(m.c.ValueDefinition(y))  	tmpX, tmpY := m.copyToTmp(xx.reg()), m.copyToTmp(yy.reg())  	// Apply mask to X.  	tmp := m.c.AllocateVReg(ssa.TypeV128) -	loadMaskLo := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(xmaskLabel.L)), tmp) +	loadMaskLo := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(xl)), tmp)  	m.insert(loadMaskLo)  	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmp), tmpX))  	// Apply mask to Y. -	loadMaskHi := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(ymaskLabel.L)), tmp) +	loadMaskHi := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(yl)), tmp)  	m.insert(loadMaskHi)  	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmp), tmpY)) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go index 8fa974c66..e53729860 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go @@ -12,7 +12,7 @@ func (m *machine) PostRegAlloc() {  }  func (m *machine) setupPrologue() { -	cur := m.ectx.RootInstr +	cur := m.rootInstr  	prevInitInst := cur.next  	// At this point, we have the stack layout as follows: @@ -130,14 +130,13 @@ func (m *machine) setupPrologue() {  // 3. Inserts the dec/inc RSP instruction right before/after the call instruction.  // 4. Lowering that is supposed to be done after regalloc.  func (m *machine) postRegAlloc() { -	ectx := m.ectx -	for cur := ectx.RootInstr; cur != nil; cur = cur.next { +	for cur := m.rootInstr; cur != nil; cur = cur.next {  		switch k := cur.kind; k {  		case ret:  			m.setupEpilogueAfter(cur.prev)  			continue  		case fcvtToSintSequence, fcvtToUintSequence: -			m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0] +			m.pendingInstructions = m.pendingInstructions[:0]  			if k == fcvtToSintSequence {  				m.lowerFcvtToSintSequenceAfterRegalloc(cur)  			} else { @@ -146,29 +145,29 @@ func (m *machine) postRegAlloc() {  			prev := cur.prev  			next := cur.next  			cur := prev -			for _, instr := range m.ectx.PendingInstructions { +			for _, instr := range m.pendingInstructions {  				cur = linkInstr(cur, instr)  			}  			linkInstr(cur, next)  			continue  		case xmmCMov: -			m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0] +			m.pendingInstructions = m.pendingInstructions[:0]  			m.lowerXmmCmovAfterRegAlloc(cur)  			prev := cur.prev  			next := cur.next  			cur := prev -			for _, instr := range m.ectx.PendingInstructions { +			for _, instr := range m.pendingInstructions {  				cur = linkInstr(cur, instr)  			}  			linkInstr(cur, next)  			continue  		case idivRemSequence: -			m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0] +			m.pendingInstructions = m.pendingInstructions[:0]  			m.lowerIDivRemSequenceAfterRegAlloc(cur)  			prev := cur.prev  			next := cur.next  			cur := prev -			for _, instr := range m.ectx.PendingInstructions { +			for _, instr := range m.pendingInstructions {  				cur = linkInstr(cur, instr)  			}  			linkInstr(cur, next) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go index 0bb28ee9e..de9dcc944 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go @@ -1,13 +1,226 @@  package amd64  import ( -	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"  	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"  	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"  ) -// InsertMoveBefore implements backend.RegAllocFunctionMachine. -func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { +// regAllocFn implements regalloc.Function. +type regAllocFn struct { +	ssaB                   ssa.Builder +	m                      *machine +	loopNestingForestRoots []ssa.BasicBlock +	blockIter              int +} + +// PostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorBegin() *labelPosition { +	f.blockIter = len(f.m.orderedSSABlockLabelPos) - 1 +	return f.PostOrderBlockIteratorNext() +} + +// PostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorNext() *labelPosition { +	if f.blockIter < 0 { +		return nil +	} +	b := f.m.orderedSSABlockLabelPos[f.blockIter] +	f.blockIter-- +	return b +} + +// ReversePostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorBegin() *labelPosition { +	f.blockIter = 0 +	return f.ReversePostOrderBlockIteratorNext() +} + +// ReversePostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorNext() *labelPosition { +	if f.blockIter >= len(f.m.orderedSSABlockLabelPos) { +		return nil +	} +	b := f.m.orderedSSABlockLabelPos[f.blockIter] +	f.blockIter++ +	return b +} + +// ClobberedRegisters implements regalloc.Function. +func (f *regAllocFn) ClobberedRegisters(regs []regalloc.VReg) { +	f.m.clobberedRegs = append(f.m.clobberedRegs[:0], regs...) +} + +// LoopNestingForestRoots implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoots() int { +	f.loopNestingForestRoots = f.ssaB.LoopNestingForestRoots() +	return len(f.loopNestingForestRoots) +} + +// LoopNestingForestRoot implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoot(i int) *labelPosition { +	root := f.loopNestingForestRoots[i] +	pos := f.m.getOrAllocateSSABlockLabelPosition(root) +	return pos +} + +// LowestCommonAncestor implements regalloc.Function. +func (f *regAllocFn) LowestCommonAncestor(blk1, blk2 *labelPosition) *labelPosition { +	sb := f.ssaB.LowestCommonAncestor(blk1.sb, blk2.sb) +	pos := f.m.getOrAllocateSSABlockLabelPosition(sb) +	return pos +} + +// Idom implements regalloc.Function. +func (f *regAllocFn) Idom(blk *labelPosition) *labelPosition { +	sb := f.ssaB.Idom(blk.sb) +	pos := f.m.getOrAllocateSSABlockLabelPosition(sb) +	return pos +} + +// SwapBefore implements regalloc.Function. +func (f *regAllocFn) SwapBefore(x1, x2, tmp regalloc.VReg, instr *instruction) { +	f.m.swap(instr.prev, x1, x2, tmp) +} + +// StoreRegisterBefore implements regalloc.Function. +func (f *regAllocFn) StoreRegisterBefore(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertStoreRegisterAt(v, instr, false) +} + +// StoreRegisterAfter implements regalloc.Function. +func (f *regAllocFn) StoreRegisterAfter(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertStoreRegisterAt(v, instr, true) +} + +// ReloadRegisterBefore implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterBefore(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertReloadRegisterAt(v, instr, false) +} + +// ReloadRegisterAfter implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterAfter(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertReloadRegisterAt(v, instr, true) +} + +// InsertMoveBefore implements regalloc.Function. +func (f *regAllocFn) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { +	f.m.insertMoveBefore(dst, src, instr) +} + +// LoopNestingForestChild implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestChild(pos *labelPosition, i int) *labelPosition { +	childSB := pos.sb.LoopNestingForestChildren()[i] +	return f.m.getOrAllocateSSABlockLabelPosition(childSB) +} + +// Succ implements regalloc.Block. +func (f *regAllocFn) Succ(pos *labelPosition, i int) *labelPosition { +	succSB := pos.sb.Succ(i) +	if succSB.ReturnBlock() { +		return nil +	} +	return f.m.getOrAllocateSSABlockLabelPosition(succSB) +} + +// Pred implements regalloc.Block. +func (f *regAllocFn) Pred(pos *labelPosition, i int) *labelPosition { +	predSB := pos.sb.Pred(i) +	return f.m.getOrAllocateSSABlockLabelPosition(predSB) +} + +// BlockParams implements regalloc.Function. +func (f *regAllocFn) BlockParams(pos *labelPosition, regs *[]regalloc.VReg) []regalloc.VReg { +	c := f.m.c +	*regs = (*regs)[:0] +	for i := 0; i < pos.sb.Params(); i++ { +		v := c.VRegOf(pos.sb.Param(i)) +		*regs = append(*regs, v) +	} +	return *regs +} + +// ID implements regalloc.Block. +func (pos *labelPosition) ID() int32 { +	return int32(pos.sb.ID()) +} + +// InstrIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrIteratorBegin() *instruction { +	ret := pos.begin +	pos.cur = ret +	return ret +} + +// InstrIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrIteratorNext() *instruction { +	for { +		if pos.cur == pos.end { +			return nil +		} +		instr := pos.cur.next +		pos.cur = instr +		if instr == nil { +			return nil +		} else if instr.addedBeforeRegAlloc { +			// Only concerned about the instruction added before regalloc. +			return instr +		} +	} +} + +// InstrRevIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorBegin() *instruction { +	pos.cur = pos.end +	return pos.cur +} + +// InstrRevIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorNext() *instruction { +	for { +		if pos.cur == pos.begin { +			return nil +		} +		instr := pos.cur.prev +		pos.cur = instr +		if instr == nil { +			return nil +		} else if instr.addedBeforeRegAlloc { +			// Only concerned about the instruction added before regalloc. +			return instr +		} +	} +} + +// FirstInstr implements regalloc.Block. +func (pos *labelPosition) FirstInstr() *instruction { return pos.begin } + +// LastInstrForInsertion implements regalloc.Block. +func (pos *labelPosition) LastInstrForInsertion() *instruction { +	return lastInstrForInsertion(pos.begin, pos.end) +} + +// Preds implements regalloc.Block. +func (pos *labelPosition) Preds() int { return pos.sb.Preds() } + +// Entry implements regalloc.Block. +func (pos *labelPosition) Entry() bool { return pos.sb.EntryBlock() } + +// Succs implements regalloc.Block. +func (pos *labelPosition) Succs() int { return pos.sb.Succs() } + +// LoopHeader implements regalloc.Block. +func (pos *labelPosition) LoopHeader() bool { return pos.sb.LoopHeader() } + +// LoopNestingForestChildren implements regalloc.Block. +func (pos *labelPosition) LoopNestingForestChildren() int { +	return len(pos.sb.LoopNestingForestChildren()) +} + +func (m *machine) insertMoveBefore(dst, src regalloc.VReg, instr *instruction) {  	typ := src.RegType()  	if typ != dst.RegType() {  		panic("BUG: src and dst must have the same type") @@ -26,8 +239,7 @@ func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) {  	linkInstr(cur, prevNext)  } -// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {  	if !v.IsRealReg() {  		panic("BUG: VReg must be backed by real reg to be stored")  	} @@ -61,8 +273,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft  	return linkInstr(cur, prevNext)  } -// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {  	if !v.IsRealReg() {  		panic("BUG: VReg must be backed by real reg to be stored")  	} @@ -98,13 +309,7 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af  	return linkInstr(cur, prevNext)  } -// ClobberedRegisters implements backend.RegAllocFunctionMachine. -func (m *machine) ClobberedRegisters(regs []regalloc.VReg) { -	m.clobberedRegs = append(m.clobberedRegs[:0], regs...) -} - -// Swap implements backend.RegAllocFunctionMachine. -func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) { +func (m *machine) swap(cur *instruction, x1, x2, tmp regalloc.VReg) {  	if x1.RegType() == regalloc.RegTypeInt {  		prevNext := cur.next  		xc := m.allocateInstr().asXCHG(x1, newOperandReg(x2), 8) @@ -113,25 +318,24 @@ func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {  	} else {  		if tmp.Valid() {  			prevNext := cur.next -			m.InsertMoveBefore(tmp, x1, prevNext) -			m.InsertMoveBefore(x1, x2, prevNext) -			m.InsertMoveBefore(x2, tmp, prevNext) +			m.insertMoveBefore(tmp, x1, prevNext) +			m.insertMoveBefore(x1, x2, prevNext) +			m.insertMoveBefore(x2, tmp, prevNext)  		} else {  			prevNext := cur.next  			r2 := x2.RealReg()  			// Temporarily spill x1 to stack. -			cur = m.InsertStoreRegisterAt(x1, cur, true).prev +			cur = m.insertStoreRegisterAt(x1, cur, true).prev  			// Then move x2 to x1.  			cur = linkInstr(cur, m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqa, newOperandReg(x2), x1))  			linkInstr(cur, prevNext)  			// Then reload the original value on x1 from stack to r2. -			m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true) +			m.insertReloadRegisterAt(x1.SetRealReg(r2), cur, true)  		}  	}  } -// LastInstrForInsertion implements backend.RegAllocFunctionMachine. -func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction { +func lastInstrForInsertion(begin, end *instruction) *instruction {  	cur := end  	for cur.kind == nop0 {  		cur = cur.prev @@ -146,8 +350,3 @@ func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction {  		return end  	}  } - -// SSABlockLabel implements backend.RegAllocFunctionMachine. -func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label { -	return m.ectx.SsaBlockIDToLabels[id] -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go index 539a8b754..8d514d857 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go @@ -127,7 +127,7 @@ func (m *machine) lowerSqmulRoundSat(x, y, ret ssa.Value) {  	tmpX := m.copyToTmp(xx.reg())  	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmulhrsw, yy, tmpX)) -	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmpX), tmp)) +	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqw, newOperandReg(tmpX), tmp))  	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmpX))  	m.copyTo(tmpX, m.c.VRegOf(ret)) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go index c6fcb8673..787975683 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go @@ -59,7 +59,7 @@ func (o *operand) format(_64 bool) string {  	case operandKindImm32:  		return fmt.Sprintf("$%d", int32(o.imm32()))  	case operandKindLabel: -		return backend.Label(o.imm32()).String() +		return label(o.imm32()).String()  	default:  		panic(fmt.Sprintf("BUG: invalid operand: %s", o.kind))  	} @@ -85,22 +85,22 @@ func (o *operand) imm32() uint32 {  	return uint32(o.data)  } -func (o *operand) label() backend.Label { +func (o *operand) label() label {  	switch o.kind {  	case operandKindLabel: -		return backend.Label(o.data) +		return label(o.data)  	case operandKindMem:  		mem := o.addressMode()  		if mem.kind() != amodeRipRel {  			panic("BUG: invalid label")  		} -		return backend.Label(mem.imm32) +		return label(mem.imm32)  	default:  		panic("BUG: invalid operand kind")  	}  } -func newOperandLabel(label backend.Label) operand { +func newOperandLabel(label label) operand {  	return operand{kind: operandKindLabel, data: uint64(label)}  } @@ -221,7 +221,7 @@ func (m *machine) newAmodeRegRegShift(imm32 uint32, base, index regalloc.VReg, s  	return ret  } -func (m *machine) newAmodeRipRel(label backend.Label) *amode { +func (m *machine) newAmodeRipRel(label label) *amode {  	ret := m.amodePool.Allocate()  	*ret = amode{kindWithShift: uint32(amodeRipRel), imm32: uint32(label)}  	return ret @@ -246,18 +246,18 @@ func (a *amode) String() string {  			"%d(%s,%s,%d)",  			int32(a.imm32), formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift)  	case amodeRipRel: -		return fmt.Sprintf("%s(%%rip)", backend.Label(a.imm32)) +		return fmt.Sprintf("%s(%%rip)", label(a.imm32))  	default:  		panic("BUG: invalid amode kind")  	}  } -func (m *machine) getOperand_Mem_Reg(def *backend.SSAValueDefinition) (op operand) { -	if def.IsFromBlockParam() { -		return newOperandReg(def.BlkParamVReg) +func (m *machine) getOperand_Mem_Reg(def backend.SSAValueDefinition) (op operand) { +	if !def.IsFromInstr() { +		return newOperandReg(m.c.VRegOf(def.V))  	} -	if def.SSAValue().Type() == ssa.TypeV128 { +	if def.V.Type() == ssa.TypeV128 {  		// SIMD instructions require strict memory alignment, so we don't support the memory operand for V128 at the moment.  		return m.getOperand_Reg(def)  	} @@ -272,9 +272,9 @@ func (m *machine) getOperand_Mem_Reg(def *backend.SSAValueDefinition) (op operan  	return m.getOperand_Reg(def)  } -func (m *machine) getOperand_Mem_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) { -	if def.IsFromBlockParam() { -		return newOperandReg(def.BlkParamVReg) +func (m *machine) getOperand_Mem_Imm32_Reg(def backend.SSAValueDefinition) (op operand) { +	if !def.IsFromInstr() { +		return newOperandReg(m.c.VRegOf(def.V))  	}  	if m.c.MatchInstr(def, ssa.OpcodeLoad) { @@ -287,9 +287,9 @@ func (m *machine) getOperand_Mem_Imm32_Reg(def *backend.SSAValueDefinition) (op  	return m.getOperand_Imm32_Reg(def)  } -func (m *machine) getOperand_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) { -	if def.IsFromBlockParam() { -		return newOperandReg(def.BlkParamVReg) +func (m *machine) getOperand_Imm32_Reg(def backend.SSAValueDefinition) (op operand) { +	if !def.IsFromInstr() { +		return newOperandReg(m.c.VRegOf(def.V))  	}  	instr := def.Instr @@ -323,24 +323,14 @@ func asImm32(val uint64, allowSignExt bool) (uint32, bool) {  	return u32val, true  } -func (m *machine) getOperand_Reg(def *backend.SSAValueDefinition) (op operand) { +func (m *machine) getOperand_Reg(def backend.SSAValueDefinition) (op operand) {  	var v regalloc.VReg -	if def.IsFromBlockParam() { -		v = def.BlkParamVReg +	if instr := def.Instr; instr != nil && instr.Constant() { +		// We inline all the constant instructions so that we could reduce the register usage. +		v = m.lowerConstant(instr) +		instr.MarkLowered()  	} else { -		instr := def.Instr -		if instr.Constant() { -			// We inline all the constant instructions so that we could reduce the register usage. -			v = m.lowerConstant(instr) -			instr.MarkLowered() -		} else { -			if n := def.N; n == 0 { -				v = m.c.VRegOf(instr.Return()) -			} else { -				_, rs := instr.Returns() -				v = m.c.VRegOf(rs[n-1]) -			} -		} +		v = m.c.VRegOf(def.V)  	}  	return newOperandReg(v)  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go deleted file mode 100644 index 5219837e3..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build !tinygo - -package amd64 - -import "reflect" - -// setSliceLimits sets both Cap and Len for the given reflected slice. -func setSliceLimits(s *reflect.SliceHeader, limit uintptr) { -	s.Len = int(limit) -	s.Cap = int(limit) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go deleted file mode 100644 index df4cf46ec..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build tinygo - -package amd64 - -import "reflect" - -// setSliceLimits sets both Cap and Len for the given reflected slice. -func setSliceLimits(s *reflect.SliceHeader, limit uintptr) { -	s.Len = limit -	s.Len = limit -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go index 05ba5f027..ef823bdbd 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go @@ -9,12 +9,14 @@ import (  )  func stackView(rbp, top uintptr) []byte { +	l := int(top - rbp)  	var stackBuf []byte  	{ -		// TODO: use unsafe.Slice after floor version is set to Go 1.20. +		//nolint:staticcheck  		hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf))  		hdr.Data = rbp -		setSliceLimits(hdr, top-rbp) +		hdr.Len = l +		hdr.Cap = l  	}  	return stackBuf  } @@ -72,9 +74,9 @@ func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {  	//              |   SizeInBytes   |  	//              +-----------------+ <---- stackPointerBeforeGoCall  	//                 (low address) -	data := unsafe.Pointer(uintptr(unsafe.Pointer(stackPointerBeforeGoCall)) + 8) +	data := unsafe.Add(unsafe.Pointer(stackPointerBeforeGoCall), 8)  	size := *stackPointerBeforeGoCall / 8 -	return unsafe.Slice((*uint64)(data), int(size)) +	return unsafe.Slice((*uint64)(data), size)  }  func AdjustClonedStack(oldRsp, oldTop, rsp, rbp, top uintptr) { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go index 4eaa13ce1..d1eaa7cd4 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go @@ -182,9 +182,9 @@ func (m *machine) LowerReturns(rets []ssa.Value) {  // callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the  // caller side of the function call. -func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, slotBegin int64) { +func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def backend.SSAValueDefinition, slotBegin int64) {  	arg := &a.Args[argIndex] -	if def != nil && def.IsFromInstr() { +	if def.IsFromInstr() {  		// Constant instructions are inlined.  		if inst := def.Instr; inst.Constant() {  			val := inst.Return() @@ -228,10 +228,9 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i  }  func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, *addressMode) { -	exct := m.executableContext -	exct.PendingInstructions = exct.PendingInstructions[:0] +	m.pendingInstructions = m.pendingInstructions[:0]  	mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse) -	for _, instr := range exct.PendingInstructions { +	for _, instr := range m.pendingInstructions {  		cur = linkInstr(cur, instr)  	}  	return cur, mode diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go index 99e6bb482..06f8a4a05 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go @@ -14,7 +14,6 @@ var calleeSavedRegistersSorted = []regalloc.VReg{  // CompileGoFunctionTrampoline implements backend.Machine.  func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte { -	exct := m.executableContext  	argBegin := 1 // Skips exec context by default.  	if needModuleContextPtr {  		argBegin++ @@ -26,7 +25,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *  	cur := m.allocateInstr()  	cur.asNop0() -	exct.RootInstr = cur +	m.rootInstr = cur  	// Execution context is always the first argument.  	execCtrPtr := x0VReg @@ -244,7 +243,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *  	ret.asRet()  	linkInstr(cur, ret) -	m.encode(m.executableContext.RootInstr) +	m.encode(m.rootInstr)  	return m.compiler.Buf()  } @@ -302,20 +301,18 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re  }  func (m *machine) lowerConstantI64AndInsert(cur *instruction, dst regalloc.VReg, v int64) *instruction { -	exct := m.executableContext -	exct.PendingInstructions = exct.PendingInstructions[:0] +	m.pendingInstructions = m.pendingInstructions[:0]  	m.lowerConstantI64(dst, v) -	for _, instr := range exct.PendingInstructions { +	for _, instr := range m.pendingInstructions {  		cur = linkInstr(cur, instr)  	}  	return cur  }  func (m *machine) lowerConstantI32AndInsert(cur *instruction, dst regalloc.VReg, v int32) *instruction { -	exct := m.executableContext -	exct.PendingInstructions = exct.PendingInstructions[:0] +	m.pendingInstructions = m.pendingInstructions[:0]  	m.lowerConstantI32(dst, v) -	for _, instr := range exct.PendingInstructions { +	for _, instr := range m.pendingInstructions {  		cur = linkInstr(cur, instr)  	}  	return cur diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go index 7121cb538..1f563428a 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -36,18 +36,6 @@ type (  	instructionKind byte  ) -func asNop0(i *instruction) { -	i.kind = nop0 -} - -func setNext(i, next *instruction) { -	i.next = next -} - -func setPrev(i, prev *instruction) { -	i.prev = prev -} -  // IsCall implements regalloc.Instr IsCall.  func (i *instruction) IsCall() bool {  	return i.kind == call @@ -63,21 +51,6 @@ func (i *instruction) IsReturn() bool {  	return i.kind == ret  } -// Next implements regalloc.Instr Next. -func (i *instruction) Next() regalloc.Instr { -	return i.next -} - -// Prev implements regalloc.Instr Prev. -func (i *instruction) Prev() regalloc.Instr { -	return i.prev -} - -// AddedBeforeRegAlloc implements regalloc.Instr AddedBeforeRegAlloc. -func (i *instruction) AddedBeforeRegAlloc() bool { -	return i.addedBeforeRegAlloc -} -  type defKind byte  const ( diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go index f0ede2d6a..21be9b71e 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go @@ -12,7 +12,7 @@ import (  // Encode implements backend.Machine Encode.  func (m *machine) Encode(ctx context.Context) error {  	m.resolveRelativeAddresses(ctx) -	m.encode(m.executableContext.RootInstr) +	m.encode(m.rootInstr)  	if l := len(m.compiler.Buf()); l > maxFunctionExecutableSize {  		return fmt.Errorf("function size exceeds the limit: %d > %d", l, maxFunctionExecutableSize)  	} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 048bf3204..f9df356c0 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -17,19 +17,18 @@ import (  // LowerSingleBranch implements backend.Machine.  func (m *machine) LowerSingleBranch(br *ssa.Instruction) { -	ectx := m.executableContext  	switch br.Opcode() {  	case ssa.OpcodeJump: -		_, _, targetBlk := br.BranchData() +		_, _, targetBlkID := br.BranchData()  		if br.IsFallthroughJump() {  			return  		}  		b := m.allocateInstr() -		target := ectx.GetOrAllocateSSABlockLabel(targetBlk) -		if target == labelReturn { +		targetBlk := m.compiler.SSABuilder().BasicBlock(targetBlkID) +		if targetBlk.ReturnBlock() {  			b.asRet()  		} else { -			b.asBr(target) +			b.asBr(ssaBlockLabel(targetBlk))  		}  		m.insert(b)  	case ssa.OpcodeBrTable: @@ -40,7 +39,8 @@ func (m *machine) LowerSingleBranch(br *ssa.Instruction) {  }  func (m *machine) lowerBrTable(i *ssa.Instruction) { -	index, targets := i.BrTableData() +	index, targetBlockIDs := i.BrTableData() +	targetBlockCount := len(targetBlockIDs.View())  	indexOperand := m.getOperand_NR(m.compiler.ValueDefinition(index), extModeNone)  	// Firstly, we have to do the bounds check of the index, and @@ -50,7 +50,7 @@ func (m *machine) lowerBrTable(i *ssa.Instruction) {  	// subs wzr, index, maxIndexReg  	// csel adjustedIndex, maxIndexReg, index, hs ;; if index is higher or equal than maxIndexReg.  	maxIndexReg := m.compiler.AllocateVReg(ssa.TypeI32) -	m.lowerConstantI32(maxIndexReg, int32(len(targets)-1)) +	m.lowerConstantI32(maxIndexReg, int32(targetBlockCount-1))  	subs := m.allocateInstr()  	subs.asALU(aluOpSubS, xzrVReg, indexOperand, operandNR(maxIndexReg), false)  	m.insert(subs) @@ -61,24 +61,24 @@ func (m *machine) lowerBrTable(i *ssa.Instruction) {  	brSequence := m.allocateInstr() -	tableIndex := m.addJmpTableTarget(targets) -	brSequence.asBrTableSequence(adjustedIndex, tableIndex, len(targets)) +	tableIndex := m.addJmpTableTarget(targetBlockIDs) +	brSequence.asBrTableSequence(adjustedIndex, tableIndex, targetBlockCount)  	m.insert(brSequence)  }  // LowerConditionalBranch implements backend.Machine.  func (m *machine) LowerConditionalBranch(b *ssa.Instruction) { -	exctx := m.executableContext -	cval, args, targetBlk := b.BranchData() +	cval, args, targetBlkID := b.BranchData()  	if len(args) > 0 {  		panic(fmt.Sprintf(  			"conditional branch shouldn't have args; likely a bug in critical edge splitting: from %s to %s", -			exctx.CurrentSSABlk, -			targetBlk, +			m.currentLabelPos.sb, +			targetBlkID,  		))  	} -	target := exctx.GetOrAllocateSSABlockLabel(targetBlk) +	targetBlk := m.compiler.SSABuilder().BasicBlock(targetBlkID) +	target := ssaBlockLabel(targetBlk)  	cvalDef := m.compiler.ValueDefinition(cval)  	switch { @@ -791,7 +791,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  	default:  		panic("TODO: lowering " + op.String())  	} -	m.executableContext.FlushPendingInstructions() +	m.FlushPendingInstructions()  }  func (m *machine) lowerShuffle(rd regalloc.VReg, rn, rm operand, lane1, lane2 uint64) { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go index d9fbf1789..7a398c3d0 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go @@ -162,9 +162,9 @@ func (o operand) assignReg(v regalloc.VReg) operand {  //  // `mode` is used to extend the operand if the bit length is smaller than mode.bits().  // If the operand can be expressed as operandKindImm12, `mode` is ignored. -func (m *machine) getOperand_Imm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { -	if def.IsFromBlockParam() { -		return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_Imm12_ER_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { +	if !def.IsFromInstr() { +		return operandNR(m.compiler.VRegOf(def.V))  	}  	instr := def.Instr @@ -179,9 +179,9 @@ func (m *machine) getOperand_Imm12_ER_SR_NR(def *backend.SSAValueDefinition, mod  // getOperand_MaybeNegatedImm12_ER_SR_NR is almost the same as getOperand_Imm12_ER_SR_NR, but this might negate the immediate value.  // If the immediate value is negated, the second return value is true, otherwise always false. -func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand, negatedImm12 bool) { -	if def.IsFromBlockParam() { -		return operandNR(def.BlkParamVReg), false +func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand, negatedImm12 bool) { +	if !def.IsFromInstr() { +		return operandNR(m.compiler.VRegOf(def.V)), false  	}  	instr := def.Instr @@ -193,7 +193,7 @@ func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDef  		}  		signExtended := int64(c) -		if def.SSAValue().Type().Bits() == 32 { +		if def.V.Type().Bits() == 32 {  			signExtended = (signExtended << 32) >> 32  		}  		negatedWithoutSign := -signExtended @@ -208,9 +208,9 @@ func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDef  // ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def).  //  // `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -func (m *machine) getOperand_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { -	if def.IsFromBlockParam() { -		return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_ER_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { +	if !def.IsFromInstr() { +		return operandNR(m.compiler.VRegOf(def.V))  	}  	if m.compiler.MatchInstr(def, ssa.OpcodeSExtend) || m.compiler.MatchInstr(def, ssa.OpcodeUExtend) { @@ -251,9 +251,9 @@ func (m *machine) getOperand_ER_SR_NR(def *backend.SSAValueDefinition, mode extM  // ensureValueNR returns an operand of either operandKindSR or operandKindNR from the given value (defined by `def).  //  // `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -func (m *machine) getOperand_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { -	if def.IsFromBlockParam() { -		return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { +	if !def.IsFromInstr() { +		return operandNR(m.compiler.VRegOf(def.V))  	}  	if m.compiler.MatchInstr(def, ssa.OpcodeIshl) { @@ -273,9 +273,9 @@ func (m *machine) getOperand_SR_NR(def *backend.SSAValueDefinition, mode extMode  }  // getOperand_ShiftImm_NR returns an operand of either operandKindShiftImm or operandKindNR from the given value (defined by `def). -func (m *machine) getOperand_ShiftImm_NR(def *backend.SSAValueDefinition, mode extMode, shiftBitWidth byte) (op operand) { -	if def.IsFromBlockParam() { -		return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_ShiftImm_NR(def backend.SSAValueDefinition, mode extMode, shiftBitWidth byte) (op operand) { +	if !def.IsFromInstr() { +		return operandNR(m.compiler.VRegOf(def.V))  	}  	instr := def.Instr @@ -289,28 +289,18 @@ func (m *machine) getOperand_ShiftImm_NR(def *backend.SSAValueDefinition, mode e  // ensureValueNR returns an operand of operandKindNR from the given value (defined by `def).  //  // `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -func (m *machine) getOperand_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { +func (m *machine) getOperand_NR(def backend.SSAValueDefinition, mode extMode) (op operand) {  	var v regalloc.VReg -	if def.IsFromBlockParam() { -		v = def.BlkParamVReg +	if def.IsFromInstr() && def.Instr.Constant() { +		// We inline all the constant instructions so that we could reduce the register usage. +		v = m.lowerConstant(def.Instr) +		def.Instr.MarkLowered()  	} else { -		instr := def.Instr -		if instr.Constant() { -			// We inline all the constant instructions so that we could reduce the register usage. -			v = m.lowerConstant(instr) -			instr.MarkLowered() -		} else { -			if n := def.N; n == 0 { -				v = m.compiler.VRegOf(instr.Return()) -			} else { -				_, rs := instr.Returns() -				v = m.compiler.VRegOf(rs[n-1]) -			} -		} +		v = m.compiler.VRegOf(def.V)  	}  	r := v -	switch inBits := def.SSAValue().Type().Bits(); { +	switch inBits := def.V.Type().Bits(); {  	case mode == extModeNone:  	case inBits == 32 && (mode == extModeZeroExtend32 || mode == extModeSignExtend32):  	case inBits == 32 && mode == extModeZeroExtend64: diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go index 5f584f928..00e6b238f 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go @@ -3,6 +3,7 @@ package arm64  import (  	"context"  	"fmt" +	"math"  	"strings"  	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend" @@ -14,12 +15,33 @@ import (  type (  	// machine implements backend.Machine.  	machine struct { -		compiler          backend.Compiler -		executableContext *backend.ExecutableContextT[instruction] -		currentABI        *backend.FunctionABI - -		regAlloc   regalloc.Allocator -		regAllocFn *backend.RegAllocFunction[*instruction, *machine] +		compiler   backend.Compiler +		currentABI *backend.FunctionABI +		instrPool  wazevoapi.Pool[instruction] +		// labelPositionPool is the pool of labelPosition. The id is the label where +		// if the label is less than the maxSSABlockID, it's the ssa.BasicBlockID. +		labelPositionPool wazevoapi.IDedPool[labelPosition] + +		// nextLabel is the next label to be allocated. The first free label comes after maxSSABlockID +		// so that we can have an identical label for the SSA block ID, which is useful for debugging. +		nextLabel label +		// rootInstr is the first instruction of the function. +		rootInstr *instruction +		// currentLabelPos is the currently-compiled ssa.BasicBlock's labelPosition. +		currentLabelPos *labelPosition +		// orderedSSABlockLabelPos is the ordered list of labelPosition in the generated code for each ssa.BasicBlock. +		orderedSSABlockLabelPos []*labelPosition +		// returnLabelPos is the labelPosition for the return block. +		returnLabelPos labelPosition +		// perBlockHead and perBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock. +		perBlockHead, perBlockEnd *instruction +		// pendingInstructions are the instructions which are not yet emitted into the instruction list. +		pendingInstructions []*instruction +		// maxSSABlockID is the maximum ssa.BasicBlockID in the current function. +		maxSSABlockID label + +		regAlloc   regalloc.Allocator[*instruction, *labelPosition, *regAllocFn] +		regAllocFn regAllocFn  		amodePool wazevoapi.Pool[addressMode] @@ -35,6 +57,8 @@ type (  		// jmpTableTargets holds the labels of the jump table targets.  		jmpTableTargets [][]uint32 +		// jmpTableTargetNext is the index to the jmpTableTargets slice to be used for the next jump table. +		jmpTableTargetsNext int  		// spillSlotSize is the size of the stack slot in bytes used for spilling registers.  		// During the execution of the function, the stack looks like: @@ -91,45 +115,132 @@ type (  		nextLabel label  		offset    int64  	} +) -	labelPosition = backend.LabelPosition[instruction] -	label         = backend.Label +type ( +	// label represents a position in the generated code which is either +	// a real instruction or the constant InstructionPool (e.g. jump tables). +	// +	// This is exactly the same as the traditional "label" in assembly code. +	label uint32 + +	// labelPosition represents the regions of the generated code which the label represents. +	// This implements regalloc.Block. +	labelPosition struct { +		// sb is not nil if this corresponds to a ssa.BasicBlock. +		sb ssa.BasicBlock +		// cur is used to walk through the instructions in the block during the register allocation. +		cur, +		// begin and end are the first and last instructions of the block. +		begin, end *instruction +		// binaryOffset is the offset in the binary where the label is located. +		binaryOffset int64 +	}  )  const ( -	labelReturn  = backend.LabelReturn -	labelInvalid = backend.LabelInvalid +	labelReturn  label = math.MaxUint32 +	labelInvalid       = labelReturn - 1  ) +// String implements backend.Machine. +func (l label) String() string { +	return fmt.Sprintf("L%d", l) +} + +func resetLabelPosition(l *labelPosition) { +	*l = labelPosition{} +} +  // NewBackend returns a new backend for arm64.  func NewBackend() backend.Machine {  	m := &machine{  		spillSlots:        make(map[regalloc.VRegID]int64), -		executableContext: newExecutableContext(), -		regAlloc:          regalloc.NewAllocator(regInfo), +		regAlloc:          regalloc.NewAllocator[*instruction, *labelPosition, *regAllocFn](regInfo),  		amodePool:         wazevoapi.NewPool[addressMode](resetAddressMode), +		instrPool:         wazevoapi.NewPool[instruction](resetInstruction), +		labelPositionPool: wazevoapi.NewIDedPool[labelPosition](resetLabelPosition),  	} +	m.regAllocFn.m = m  	return m  } -func newExecutableContext() *backend.ExecutableContextT[instruction] { -	return backend.NewExecutableContextT[instruction](resetInstruction, setNext, setPrev, asNop0) +func ssaBlockLabel(sb ssa.BasicBlock) label { +	if sb.ReturnBlock() { +		return labelReturn +	} +	return label(sb.ID())  } -// ExecutableContext implements backend.Machine. -func (m *machine) ExecutableContext() backend.ExecutableContext { -	return m.executableContext +// getOrAllocateSSABlockLabelPosition returns the labelPosition for the given basic block. +func (m *machine) getOrAllocateSSABlockLabelPosition(sb ssa.BasicBlock) *labelPosition { +	if sb.ReturnBlock() { +		m.returnLabelPos.sb = sb +		return &m.returnLabelPos +	} + +	l := ssaBlockLabel(sb) +	pos := m.labelPositionPool.GetOrAllocate(int(l)) +	pos.sb = sb +	return pos  } -// RegAlloc implements backend.Machine Function. -func (m *machine) RegAlloc() { -	rf := m.regAllocFn -	for _, pos := range m.executableContext.OrderedBlockLabels { -		rf.AddBlock(pos.SB, pos.L, pos.Begin, pos.End) +// LinkAdjacentBlocks implements backend.Machine. +func (m *machine) LinkAdjacentBlocks(prev, next ssa.BasicBlock) { +	prevPos, nextPos := m.getOrAllocateSSABlockLabelPosition(prev), m.getOrAllocateSSABlockLabelPosition(next) +	prevPos.end.next = nextPos.begin +} + +// StartBlock implements backend.Machine. +func (m *machine) StartBlock(blk ssa.BasicBlock) { +	m.currentLabelPos = m.getOrAllocateSSABlockLabelPosition(blk) +	labelPos := m.currentLabelPos +	end := m.allocateNop() +	m.perBlockHead, m.perBlockEnd = end, end +	labelPos.begin, labelPos.end = end, end +	m.orderedSSABlockLabelPos = append(m.orderedSSABlockLabelPos, labelPos) +} + +// EndBlock implements ExecutableContext. +func (m *machine) EndBlock() { +	// Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions. +	m.insertAtPerBlockHead(m.allocateNop()) + +	m.currentLabelPos.begin = m.perBlockHead + +	if m.currentLabelPos.sb.EntryBlock() { +		m.rootInstr = m.perBlockHead +	} +} + +func (m *machine) insertAtPerBlockHead(i *instruction) { +	if m.perBlockHead == nil { +		m.perBlockHead = i +		m.perBlockEnd = i +		return  	} +	i.next = m.perBlockHead +	m.perBlockHead.prev = i +	m.perBlockHead = i +} + +// FlushPendingInstructions implements backend.Machine. +func (m *machine) FlushPendingInstructions() { +	l := len(m.pendingInstructions) +	if l == 0 { +		return +	} +	for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order. +		m.insertAtPerBlockHead(m.pendingInstructions[i]) +	} +	m.pendingInstructions = m.pendingInstructions[:0] +} + +// RegAlloc implements backend.Machine Function. +func (m *machine) RegAlloc() {  	m.regAllocStarted = true -	m.regAlloc.DoAllocation(rf) +	m.regAlloc.DoAllocation(&m.regAllocFn)  	// Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes.  	m.spillSlotSize = (m.spillSlotSize + 15) &^ 15  } @@ -146,13 +257,22 @@ func (m *machine) Reset() {  	m.clobberedRegs = m.clobberedRegs[:0]  	m.regAllocStarted = false  	m.regAlloc.Reset() -	m.regAllocFn.Reset()  	m.spillSlotSize = 0  	m.unresolvedAddressModes = m.unresolvedAddressModes[:0]  	m.maxRequiredStackSizeForCalls = 0 -	m.executableContext.Reset() -	m.jmpTableTargets = m.jmpTableTargets[:0] +	m.jmpTableTargetsNext = 0  	m.amodePool.Reset() +	m.instrPool.Reset() +	m.labelPositionPool.Reset() +	m.pendingInstructions = m.pendingInstructions[:0] +	m.perBlockHead, m.perBlockEnd, m.rootInstr = nil, nil, nil +	m.orderedSSABlockLabelPos = m.orderedSSABlockLabelPos[:0] +} + +// StartLoweringFunction implements backend.Machine StartLoweringFunction. +func (m *machine) StartLoweringFunction(maxBlockID ssa.BasicBlockID) { +	m.maxSSABlockID = label(maxBlockID) +	m.nextLabel = label(maxBlockID) + 1  }  // SetCurrentABI implements backend.Machine SetCurrentABI. @@ -168,12 +288,11 @@ func (m *machine) DisableStackCheck() {  // SetCompiler implements backend.Machine.  func (m *machine) SetCompiler(ctx backend.Compiler) {  	m.compiler = ctx -	m.regAllocFn = backend.NewRegAllocFunction[*instruction, *machine](m, ctx.SSABuilder(), ctx) +	m.regAllocFn.ssaB = ctx.SSABuilder()  }  func (m *machine) insert(i *instruction) { -	ectx := m.executableContext -	ectx.PendingInstructions = append(ectx.PendingInstructions, i) +	m.pendingInstructions = append(m.pendingInstructions, i)  }  func (m *machine) insertBrTargetLabel() label { @@ -183,18 +302,18 @@ func (m *machine) insertBrTargetLabel() label {  }  func (m *machine) allocateBrTarget() (nop *instruction, l label) { -	ectx := m.executableContext -	l = ectx.AllocateLabel() +	l = m.nextLabel +	m.nextLabel++  	nop = m.allocateInstr()  	nop.asNop0WithLabel(l) -	pos := ectx.GetOrAllocateLabelPosition(l) -	pos.Begin, pos.End = nop, nop +	pos := m.labelPositionPool.GetOrAllocate(int(l)) +	pos.begin, pos.end = nop, nop  	return  }  // allocateInstr allocates an instruction.  func (m *machine) allocateInstr() *instruction { -	instr := m.executableContext.InstructionPool.Allocate() +	instr := m.instrPool.Allocate()  	if !m.regAllocStarted {  		instr.addedBeforeRegAlloc = true  	} @@ -251,7 +370,6 @@ func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruc  // resolveRelativeAddresses resolves the relative addresses before encoding.  func (m *machine) resolveRelativeAddresses(ctx context.Context) { -	ectx := m.executableContext  	for {  		if len(m.unresolvedAddressModes) > 0 {  			arg0offset, ret0offset := m.arg0OffsetFromSP(), m.ret0OffsetFromSP() @@ -265,35 +383,36 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {  		var fn string  		var fnIndex int -		var labelToSSABlockID map[label]ssa.BasicBlockID +		var labelPosToLabel map[*labelPosition]label  		if wazevoapi.PerfMapEnabled { -			fn = wazevoapi.GetCurrentFunctionName(ctx) -			labelToSSABlockID = make(map[label]ssa.BasicBlockID) -			for i, l := range ectx.SsaBlockIDToLabels { -				labelToSSABlockID[l] = ssa.BasicBlockID(i) +			labelPosToLabel = make(map[*labelPosition]label) +			for i := 0; i <= m.labelPositionPool.MaxIDEncountered(); i++ { +				labelPosToLabel[m.labelPositionPool.Get(i)] = label(i)  			} + +			fn = wazevoapi.GetCurrentFunctionName(ctx)  			fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx)  		}  		// Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label.  		var offset int64 -		for i, pos := range ectx.OrderedBlockLabels { -			pos.BinaryOffset = offset +		for i, pos := range m.orderedSSABlockLabelPos { +			pos.binaryOffset = offset  			var size int64 -			for cur := pos.Begin; ; cur = cur.next { +			for cur := pos.begin; ; cur = cur.next {  				switch cur.kind {  				case nop0:  					l := cur.nop0Label() -					if pos := ectx.LabelPositions[l]; pos != nil { -						pos.BinaryOffset = offset + size +					if pos := m.labelPositionPool.Get(int(l)); pos != nil { +						pos.binaryOffset = offset + size  					}  				case condBr:  					if !cur.condBrOffsetResolved() {  						var nextLabel label -						if i < len(ectx.OrderedBlockLabels)-1 { +						if i < len(m.orderedSSABlockLabelPos)-1 {  							// Note: this is only used when the block ends with fallthrough,  							// therefore can be safely assumed that the next block exists when it's needed. -							nextLabel = ectx.OrderedBlockLabels[i+1].L +							nextLabel = ssaBlockLabel(m.orderedSSABlockLabelPos[i+1].sb)  						}  						m.condBrRelocs = append(m.condBrRelocs, condBrReloc{  							cbr: cur, currentLabelPos: pos, offset: offset + size, @@ -302,21 +421,14 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {  					}  				}  				size += cur.size() -				if cur == pos.End { +				if cur == pos.end {  					break  				}  			}  			if wazevoapi.PerfMapEnabled {  				if size > 0 { -					l := pos.L -					var labelStr string -					if blkID, ok := labelToSSABlockID[l]; ok { -						labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID) -					} else { -						labelStr = l.String() -					} -					wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr)) +					wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelPosToLabel[pos]))  				}  			}  			offset += size @@ -330,7 +442,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {  			offset := reloc.offset  			target := cbr.condBrLabel() -			offsetOfTarget := ectx.LabelPositions[target].BinaryOffset +			offsetOfTarget := m.labelPositionPool.Get(int(target)).binaryOffset  			diff := offsetOfTarget - offset  			if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {  				// This case the conditional branch is too huge. We place the trampoline instructions at the end of the current block, @@ -351,11 +463,11 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {  	}  	var currentOffset int64 -	for cur := ectx.RootInstr; cur != nil; cur = cur.next { +	for cur := m.rootInstr; cur != nil; cur = cur.next {  		switch cur.kind {  		case br:  			target := cur.brLabel() -			offsetOfTarget := ectx.LabelPositions[target].BinaryOffset +			offsetOfTarget := m.labelPositionPool.Get(int(target)).binaryOffset  			diff := offsetOfTarget - currentOffset  			divided := diff >> 2  			if divided < minSignedInt26 || divided > maxSignedInt26 { @@ -366,7 +478,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {  		case condBr:  			if !cur.condBrOffsetResolved() {  				target := cur.condBrLabel() -				offsetOfTarget := ectx.LabelPositions[target].BinaryOffset +				offsetOfTarget := m.labelPositionPool.Get(int(target)).binaryOffset  				diff := offsetOfTarget - currentOffset  				if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {  					panic("BUG: branch relocation for large conditional branch larger than 19-bit range must be handled properly") @@ -378,7 +490,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {  			targets := m.jmpTableTargets[tableIndex]  			for i := range targets {  				l := label(targets[i]) -				offsetOfTarget := ectx.LabelPositions[l].BinaryOffset +				offsetOfTarget := m.labelPositionPool.Get(int(l)).binaryOffset  				diff := offsetOfTarget - (currentOffset + brTableSequenceOffsetTableBegin)  				targets[i] = uint32(diff)  			} @@ -399,7 +511,7 @@ const (  )  func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *labelPosition, nextLabel label) { -	cur := currentBlk.End +	cur := currentBlk.end  	originalTarget := cbr.condBrLabel()  	endNext := cur.next @@ -422,32 +534,27 @@ func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *  	cur = linkInstr(cur, br)  	// Update the end of the current block. -	currentBlk.End = cur +	currentBlk.end = cur  	linkInstr(cur, endNext)  }  // Format implements backend.Machine.  func (m *machine) Format() string { -	ectx := m.executableContext  	begins := map[*instruction]label{} -	for _, pos := range ectx.LabelPositions { +	for l := label(0); l < m.nextLabel; l++ { +		pos := m.labelPositionPool.Get(int(l))  		if pos != nil { -			begins[pos.Begin] = pos.L +			begins[pos.begin] = l  		}  	} -	irBlocks := map[label]ssa.BasicBlockID{} -	for i, l := range ectx.SsaBlockIDToLabels { -		irBlocks[l] = ssa.BasicBlockID(i) -	} -  	var lines []string -	for cur := ectx.RootInstr; cur != nil; cur = cur.next { +	for cur := m.rootInstr; cur != nil; cur = cur.next {  		if l, ok := begins[cur]; ok {  			var labelStr string -			if blkID, ok := irBlocks[l]; ok { -				labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID) +			if l <= m.maxSSABlockID { +				labelStr = fmt.Sprintf("%s (SSA Block: blk%d):", l, int(l))  			} else {  				labelStr = fmt.Sprintf("%s:", l)  			} @@ -508,13 +615,17 @@ func (m *machine) frameSize() int64 {  	return s  } -func (m *machine) addJmpTableTarget(targets []ssa.BasicBlock) (index int) { -	// TODO: reuse the slice! -	labels := make([]uint32, len(targets)) -	for j, target := range targets { -		labels[j] = uint32(m.executableContext.GetOrAllocateSSABlockLabel(target)) +func (m *machine) addJmpTableTarget(targets ssa.Values) (index int) { +	if m.jmpTableTargetsNext == len(m.jmpTableTargets) { +		m.jmpTableTargets = append(m.jmpTableTargets, make([]uint32, 0, len(targets.View()))) +	} + +	index = m.jmpTableTargetsNext +	m.jmpTableTargetsNext++ +	m.jmpTableTargets[index] = m.jmpTableTargets[index][:0] +	for _, targetBlockID := range targets.View() { +		target := m.compiler.SSABuilder().BasicBlock(ssa.BasicBlockID(targetBlockID)) +		m.jmpTableTargets[index] = append(m.jmpTableTargets[index], uint32(target.ID()))  	} -	index = len(m.jmpTableTargets) -	m.jmpTableTargets = append(m.jmpTableTargets, labels)  	return  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go index d9032f921..c646a8fab 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go @@ -15,9 +15,7 @@ func (m *machine) PostRegAlloc() {  // setupPrologue initializes the prologue of the function.  func (m *machine) setupPrologue() { -	ectx := m.executableContext - -	cur := ectx.RootInstr +	cur := m.rootInstr  	prevInitInst := cur.next  	// @@ -196,21 +194,20 @@ func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction {  // 1. Removes the redundant copy instruction.  // 2. Inserts the epilogue.  func (m *machine) postRegAlloc() { -	ectx := m.executableContext -	for cur := ectx.RootInstr; cur != nil; cur = cur.next { +	for cur := m.rootInstr; cur != nil; cur = cur.next {  		switch cur.kind {  		case ret:  			m.setupEpilogueAfter(cur.prev)  		case loadConstBlockArg:  			lc := cur  			next := lc.next -			m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0] +			m.pendingInstructions = m.pendingInstructions[:0]  			m.lowerLoadConstantBlockArgAfterRegAlloc(lc) -			for _, instr := range m.executableContext.PendingInstructions { +			for _, instr := range m.pendingInstructions {  				cur = linkInstr(cur, instr)  			}  			linkInstr(cur, next) -			m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0] +			m.pendingInstructions = m.pendingInstructions[:0]  		default:  			// Removes the redundant copy instruction.  			if cur.IsCopy() && cur.rn.realReg() == cur.rd.RealReg() { @@ -432,11 +429,9 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi  // CompileStackGrowCallSequence implements backend.Machine.  func (m *machine) CompileStackGrowCallSequence() []byte { -	ectx := m.executableContext -  	cur := m.allocateInstr()  	cur.asNop0() -	ectx.RootInstr = cur +	m.rootInstr = cur  	// Save the callee saved and argument registers.  	cur = m.saveRegistersInExecutionContext(cur, saveRequiredRegs) @@ -458,16 +453,14 @@ func (m *machine) CompileStackGrowCallSequence() []byte {  	ret.asRet()  	linkInstr(cur, ret) -	m.encode(ectx.RootInstr) +	m.encode(m.rootInstr)  	return m.compiler.Buf()  }  func (m *machine) addsAddOrSubStackPointer(cur *instruction, rd regalloc.VReg, diff int64, add bool) *instruction { -	ectx := m.executableContext - -	ectx.PendingInstructions = ectx.PendingInstructions[:0] +	m.pendingInstructions = m.pendingInstructions[:0]  	m.insertAddOrSubStackPointer(rd, diff, add) -	for _, inserted := range ectx.PendingInstructions { +	for _, inserted := range m.pendingInstructions {  		cur = linkInstr(cur, inserted)  	}  	return cur diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go index c7eb92cc2..f2ed53ae5 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go @@ -3,18 +3,226 @@ package arm64  // This file implements the interfaces required for register allocations. See backend.RegAllocFunctionMachine.  import ( -	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"  	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"  	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"  ) -// ClobberedRegisters implements backend.RegAllocFunctionMachine. -func (m *machine) ClobberedRegisters(regs []regalloc.VReg) { -	m.clobberedRegs = append(m.clobberedRegs[:0], regs...) +// regAllocFn implements regalloc.Function. +type regAllocFn struct { +	ssaB                   ssa.Builder +	m                      *machine +	loopNestingForestRoots []ssa.BasicBlock +	blockIter              int  } -// Swap implements backend.RegAllocFunctionMachine. -func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) { +// PostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorBegin() *labelPosition { +	f.blockIter = len(f.m.orderedSSABlockLabelPos) - 1 +	return f.PostOrderBlockIteratorNext() +} + +// PostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorNext() *labelPosition { +	if f.blockIter < 0 { +		return nil +	} +	b := f.m.orderedSSABlockLabelPos[f.blockIter] +	f.blockIter-- +	return b +} + +// ReversePostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorBegin() *labelPosition { +	f.blockIter = 0 +	return f.ReversePostOrderBlockIteratorNext() +} + +// ReversePostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorNext() *labelPosition { +	if f.blockIter >= len(f.m.orderedSSABlockLabelPos) { +		return nil +	} +	b := f.m.orderedSSABlockLabelPos[f.blockIter] +	f.blockIter++ +	return b +} + +// ClobberedRegisters implements regalloc.Function. +func (f *regAllocFn) ClobberedRegisters(regs []regalloc.VReg) { +	f.m.clobberedRegs = append(f.m.clobberedRegs[:0], regs...) +} + +// LoopNestingForestRoots implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoots() int { +	f.loopNestingForestRoots = f.ssaB.LoopNestingForestRoots() +	return len(f.loopNestingForestRoots) +} + +// LoopNestingForestRoot implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoot(i int) *labelPosition { +	root := f.loopNestingForestRoots[i] +	pos := f.m.getOrAllocateSSABlockLabelPosition(root) +	return pos +} + +// LowestCommonAncestor implements regalloc.Function. +func (f *regAllocFn) LowestCommonAncestor(blk1, blk2 *labelPosition) *labelPosition { +	sb := f.ssaB.LowestCommonAncestor(blk1.sb, blk2.sb) +	pos := f.m.getOrAllocateSSABlockLabelPosition(sb) +	return pos +} + +// Idom implements regalloc.Function. +func (f *regAllocFn) Idom(blk *labelPosition) *labelPosition { +	sb := f.ssaB.Idom(blk.sb) +	pos := f.m.getOrAllocateSSABlockLabelPosition(sb) +	return pos +} + +// SwapBefore implements regalloc.Function. +func (f *regAllocFn) SwapBefore(x1, x2, tmp regalloc.VReg, instr *instruction) { +	f.m.swap(instr.prev, x1, x2, tmp) +} + +// StoreRegisterBefore implements regalloc.Function. +func (f *regAllocFn) StoreRegisterBefore(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertStoreRegisterAt(v, instr, false) +} + +// StoreRegisterAfter implements regalloc.Function. +func (f *regAllocFn) StoreRegisterAfter(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertStoreRegisterAt(v, instr, true) +} + +// ReloadRegisterBefore implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterBefore(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertReloadRegisterAt(v, instr, false) +} + +// ReloadRegisterAfter implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterAfter(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertReloadRegisterAt(v, instr, true) +} + +// InsertMoveBefore implements regalloc.Function. +func (f *regAllocFn) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { +	f.m.insertMoveBefore(dst, src, instr) +} + +// LoopNestingForestChild implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestChild(pos *labelPosition, i int) *labelPosition { +	childSB := pos.sb.LoopNestingForestChildren()[i] +	return f.m.getOrAllocateSSABlockLabelPosition(childSB) +} + +// Succ implements regalloc.Block. +func (f *regAllocFn) Succ(pos *labelPosition, i int) *labelPosition { +	succSB := pos.sb.Succ(i) +	if succSB.ReturnBlock() { +		return nil +	} +	return f.m.getOrAllocateSSABlockLabelPosition(succSB) +} + +// Pred implements regalloc.Block. +func (f *regAllocFn) Pred(pos *labelPosition, i int) *labelPosition { +	predSB := pos.sb.Pred(i) +	return f.m.getOrAllocateSSABlockLabelPosition(predSB) +} + +// BlockParams implements regalloc.Function. +func (f *regAllocFn) BlockParams(pos *labelPosition, regs *[]regalloc.VReg) []regalloc.VReg { +	c := f.m.compiler +	*regs = (*regs)[:0] +	for i := 0; i < pos.sb.Params(); i++ { +		v := c.VRegOf(pos.sb.Param(i)) +		*regs = append(*regs, v) +	} +	return *regs +} + +// ID implements regalloc.Block. +func (pos *labelPosition) ID() int32 { +	return int32(pos.sb.ID()) +} + +// InstrIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrIteratorBegin() *instruction { +	ret := pos.begin +	pos.cur = ret +	return ret +} + +// InstrIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrIteratorNext() *instruction { +	for { +		if pos.cur == pos.end { +			return nil +		} +		instr := pos.cur.next +		pos.cur = instr +		if instr == nil { +			return nil +		} else if instr.addedBeforeRegAlloc { +			// Only concerned about the instruction added before regalloc. +			return instr +		} +	} +} + +// InstrRevIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorBegin() *instruction { +	pos.cur = pos.end +	return pos.cur +} + +// InstrRevIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorNext() *instruction { +	for { +		if pos.cur == pos.begin { +			return nil +		} +		instr := pos.cur.prev +		pos.cur = instr +		if instr == nil { +			return nil +		} else if instr.addedBeforeRegAlloc { +			// Only concerned about the instruction added before regalloc. +			return instr +		} +	} +} + +// FirstInstr implements regalloc.Block. +func (pos *labelPosition) FirstInstr() *instruction { return pos.begin } + +// LastInstrForInsertion implements regalloc.Block. +func (pos *labelPosition) LastInstrForInsertion() *instruction { +	return lastInstrForInsertion(pos.begin, pos.end) +} + +// Preds implements regalloc.Block. +func (pos *labelPosition) Preds() int { return pos.sb.Preds() } + +// Entry implements regalloc.Block. +func (pos *labelPosition) Entry() bool { return pos.sb.EntryBlock() } + +// Succs implements regalloc.Block. +func (pos *labelPosition) Succs() int { return pos.sb.Succs() } + +// LoopHeader implements regalloc.Block. +func (pos *labelPosition) LoopHeader() bool { return pos.sb.LoopHeader() } + +// LoopNestingForestChildren implements regalloc.Block. +func (pos *labelPosition) LoopNestingForestChildren() int { +	return len(pos.sb.LoopNestingForestChildren()) +} + +func (m *machine) swap(cur *instruction, x1, x2, tmp regalloc.VReg) {  	prevNext := cur.next  	var mov1, mov2, mov3 *instruction  	if x1.RegType() == regalloc.RegTypeInt { @@ -32,12 +240,12 @@ func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {  		if !tmp.Valid() {  			r2 := x2.RealReg()  			// Temporarily spill x1 to stack. -			cur = m.InsertStoreRegisterAt(x1, cur, true).prev +			cur = m.insertStoreRegisterAt(x1, cur, true).prev  			// Then move x2 to x1.  			cur = linkInstr(cur, m.allocateInstr().asFpuMov128(x1, x2))  			linkInstr(cur, prevNext)  			// Then reload the original value on x1 from stack to r2. -			m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true) +			m.insertReloadRegisterAt(x1.SetRealReg(r2), cur, true)  		} else {  			mov1 = m.allocateInstr().asFpuMov128(tmp, x1)  			mov2 = m.allocateInstr().asFpuMov128(x1, x2) @@ -50,8 +258,7 @@ func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {  	}  } -// InsertMoveBefore implements backend.RegAllocFunctionMachine. -func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { +func (m *machine) insertMoveBefore(dst, src regalloc.VReg, instr *instruction) {  	typ := src.RegType()  	if typ != dst.RegType() {  		panic("BUG: src and dst must have the same type") @@ -70,13 +277,7 @@ func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) {  	linkInstr(cur, prevNext)  } -// SSABlockLabel implements backend.RegAllocFunctionMachine. -func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label { -	return m.executableContext.SsaBlockIDToLabels[id] -} - -// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {  	if !v.IsRealReg() {  		panic("BUG: VReg must be backed by real reg to be stored")  	} @@ -100,8 +301,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft  	return linkInstr(cur, prevNext)  } -// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {  	if !v.IsRealReg() {  		panic("BUG: VReg must be backed by real reg to be stored")  	} @@ -134,8 +334,7 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af  	return linkInstr(cur, prevNext)  } -// LastInstrForInsertion implements backend.RegAllocFunctionMachine. -func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction { +func lastInstrForInsertion(begin, end *instruction) *instruction {  	cur := end  	for cur.kind == nop0 {  		cur = cur.prev diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go index edb0e36e3..a72b86f6b 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go @@ -14,7 +14,7 @@ func UnwindStack(sp, _, top uintptr, returnAddresses []uintptr) []uintptr {  	var stackBuf []byte  	{ -		// TODO: use unsafe.Slice after floor version is set to Go 1.20. +		//nolint:staticcheck  		hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf))  		hdr.Data = sp  		hdr.Len = l @@ -78,13 +78,7 @@ func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {  	//              +-----------------+ <---- stackPointerBeforeGoCall  	//                 (low address)  	ptr := unsafe.Pointer(stackPointerBeforeGoCall) +	data := (*uint64)(unsafe.Add(ptr, 16)) // skips the (frame_size, sliceSize).  	size := *(*uint64)(unsafe.Add(ptr, 8)) -	var view []uint64 -	{ -		sh := (*reflect.SliceHeader)(unsafe.Pointer(&view)) -		sh.Data = uintptr(unsafe.Add(ptr, 16)) // skips the (frame_size, sliceSize). -		sh.Len = int(size) -		sh.Cap = int(size) -	} -	return view +	return unsafe.Slice(data, size)  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go index 54ce89e46..9044a9e4b 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go @@ -11,7 +11,24 @@ import (  type (  	// Machine is a backend for a specific ISA machine.  	Machine interface { -		ExecutableContext() ExecutableContext +		// StartLoweringFunction is called when the compilation of the given function is started. +		// The maxBlockID is the maximum ssa.BasicBlockID in the function. +		StartLoweringFunction(maxBlockID ssa.BasicBlockID) + +		// LinkAdjacentBlocks is called after finished lowering all blocks in order to create one single instruction list. +		LinkAdjacentBlocks(prev, next ssa.BasicBlock) + +		// StartBlock is called when the compilation of the given block is started. +		// The order of this being called is the reverse post order of the ssa.BasicBlock(s) as we iterate with +		// ssa.Builder BlockIteratorReversePostOrderBegin and BlockIteratorReversePostOrderEnd. +		StartBlock(ssa.BasicBlock) + +		// EndBlock is called when the compilation of the current block is finished. +		EndBlock() + +		// FlushPendingInstructions flushes the pending instructions to the buffer. +		// This will be called after the lowering of each SSA Instruction. +		FlushPendingInstructions()  		// DisableStackCheck disables the stack check for the current compilation for debugging/testing.  		DisableStackCheck() diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go deleted file mode 100644 index 655370786..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go +++ /dev/null @@ -1,321 +0,0 @@ -package backend - -import ( -	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" -	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -// RegAllocFunctionMachine is the interface for the machine specific logic that will be used in RegAllocFunction. -type RegAllocFunctionMachine[I regalloc.InstrConstraint] interface { -	// InsertMoveBefore inserts the move instruction from src to dst before the given instruction. -	InsertMoveBefore(dst, src regalloc.VReg, instr I) -	// InsertStoreRegisterAt inserts the instruction(s) to store the given virtual register at the given instruction. -	// If after is true, the instruction(s) will be inserted after the given instruction, otherwise before. -	InsertStoreRegisterAt(v regalloc.VReg, instr I, after bool) I -	// InsertReloadRegisterAt inserts the instruction(s) to reload the given virtual register at the given instruction. -	// If after is true, the instruction(s) will be inserted after the given instruction, otherwise before. -	InsertReloadRegisterAt(v regalloc.VReg, instr I, after bool) I -	// ClobberedRegisters is called when the register allocation is done and the clobbered registers are known. -	ClobberedRegisters(regs []regalloc.VReg) -	// Swap swaps the two virtual registers after the given instruction. -	Swap(cur I, x1, x2, tmp regalloc.VReg) -	// LastInstrForInsertion implements LastInstrForInsertion of regalloc.Function. See its comment for details. -	LastInstrForInsertion(begin, end I) I -	// SSABlockLabel returns the label of the given ssa.BasicBlockID. -	SSABlockLabel(id ssa.BasicBlockID) Label -} - -type ( -	// RegAllocFunction implements regalloc.Function. -	RegAllocFunction[I regalloc.InstrConstraint, m RegAllocFunctionMachine[I]] struct { -		m   m -		ssb ssa.Builder -		c   Compiler -		// iter is the iterator for reversePostOrderBlocks -		iter                   int -		reversePostOrderBlocks []RegAllocBlock[I, m] -		// labelToRegAllocBlockIndex maps label to the index of reversePostOrderBlocks. -		labelToRegAllocBlockIndex [] /* Label to */ int -		loopNestingForestRoots    []ssa.BasicBlock -	} - -	// RegAllocBlock implements regalloc.Block. -	RegAllocBlock[I regalloc.InstrConstraint, m RegAllocFunctionMachine[I]] struct { -		// f is the function this instruction belongs to. Used to reuse the regAllocFunctionImpl.predsSlice slice for Defs() and Uses(). -		f                           *RegAllocFunction[I, m] -		sb                          ssa.BasicBlock -		l                           Label -		begin, end                  I -		loopNestingForestChildren   []ssa.BasicBlock -		cur                         I -		id                          int -		cachedLastInstrForInsertion I -	} -) - -// NewRegAllocFunction returns a new RegAllocFunction. -func NewRegAllocFunction[I regalloc.InstrConstraint, M RegAllocFunctionMachine[I]](m M, ssb ssa.Builder, c Compiler) *RegAllocFunction[I, M] { -	return &RegAllocFunction[I, M]{ -		m:   m, -		ssb: ssb, -		c:   c, -	} -} - -// AddBlock adds a new block to the function. -func (f *RegAllocFunction[I, M]) AddBlock(sb ssa.BasicBlock, l Label, begin, end I) { -	i := len(f.reversePostOrderBlocks) -	f.reversePostOrderBlocks = append(f.reversePostOrderBlocks, RegAllocBlock[I, M]{ -		f:     f, -		sb:    sb, -		l:     l, -		begin: begin, -		end:   end, -		id:    int(sb.ID()), -	}) -	if len(f.labelToRegAllocBlockIndex) <= int(l) { -		f.labelToRegAllocBlockIndex = append(f.labelToRegAllocBlockIndex, make([]int, int(l)-len(f.labelToRegAllocBlockIndex)+1)...) -	} -	f.labelToRegAllocBlockIndex[l] = i -} - -// Reset resets the function for the next compilation. -func (f *RegAllocFunction[I, M]) Reset() { -	f.reversePostOrderBlocks = f.reversePostOrderBlocks[:0] -	f.iter = 0 -} - -// StoreRegisterAfter implements regalloc.Function StoreRegisterAfter. -func (f *RegAllocFunction[I, M]) StoreRegisterAfter(v regalloc.VReg, instr regalloc.Instr) { -	m := f.m -	m.InsertStoreRegisterAt(v, instr.(I), true) -} - -// ReloadRegisterBefore implements regalloc.Function ReloadRegisterBefore. -func (f *RegAllocFunction[I, M]) ReloadRegisterBefore(v regalloc.VReg, instr regalloc.Instr) { -	m := f.m -	m.InsertReloadRegisterAt(v, instr.(I), false) -} - -// ReloadRegisterAfter implements regalloc.Function ReloadRegisterAfter. -func (f *RegAllocFunction[I, M]) ReloadRegisterAfter(v regalloc.VReg, instr regalloc.Instr) { -	m := f.m -	m.InsertReloadRegisterAt(v, instr.(I), true) -} - -// StoreRegisterBefore implements regalloc.Function StoreRegisterBefore. -func (f *RegAllocFunction[I, M]) StoreRegisterBefore(v regalloc.VReg, instr regalloc.Instr) { -	m := f.m -	m.InsertStoreRegisterAt(v, instr.(I), false) -} - -// ClobberedRegisters implements regalloc.Function ClobberedRegisters. -func (f *RegAllocFunction[I, M]) ClobberedRegisters(regs []regalloc.VReg) { -	f.m.ClobberedRegisters(regs) -} - -// SwapBefore implements regalloc.Function SwapBefore. -func (f *RegAllocFunction[I, M]) SwapBefore(x1, x2, tmp regalloc.VReg, instr regalloc.Instr) { -	f.m.Swap(instr.Prev().(I), x1, x2, tmp) -} - -// PostOrderBlockIteratorBegin implements regalloc.Function PostOrderBlockIteratorBegin. -func (f *RegAllocFunction[I, M]) PostOrderBlockIteratorBegin() regalloc.Block { -	f.iter = len(f.reversePostOrderBlocks) - 1 -	return f.PostOrderBlockIteratorNext() -} - -// PostOrderBlockIteratorNext implements regalloc.Function PostOrderBlockIteratorNext. -func (f *RegAllocFunction[I, M]) PostOrderBlockIteratorNext() regalloc.Block { -	if f.iter < 0 { -		return nil -	} -	b := &f.reversePostOrderBlocks[f.iter] -	f.iter-- -	return b -} - -// ReversePostOrderBlockIteratorBegin implements regalloc.Function ReversePostOrderBlockIteratorBegin. -func (f *RegAllocFunction[I, M]) ReversePostOrderBlockIteratorBegin() regalloc.Block { -	f.iter = 0 -	return f.ReversePostOrderBlockIteratorNext() -} - -// ReversePostOrderBlockIteratorNext implements regalloc.Function ReversePostOrderBlockIteratorNext. -func (f *RegAllocFunction[I, M]) ReversePostOrderBlockIteratorNext() regalloc.Block { -	if f.iter >= len(f.reversePostOrderBlocks) { -		return nil -	} -	b := &f.reversePostOrderBlocks[f.iter] -	f.iter++ -	return b -} - -// LoopNestingForestRoots implements regalloc.Function LoopNestingForestRoots. -func (f *RegAllocFunction[I, M]) LoopNestingForestRoots() int { -	f.loopNestingForestRoots = f.ssb.LoopNestingForestRoots() -	return len(f.loopNestingForestRoots) -} - -// LoopNestingForestRoot implements regalloc.Function LoopNestingForestRoot. -func (f *RegAllocFunction[I, M]) LoopNestingForestRoot(i int) regalloc.Block { -	blk := f.loopNestingForestRoots[i] -	l := f.m.SSABlockLabel(blk.ID()) -	index := f.labelToRegAllocBlockIndex[l] -	return &f.reversePostOrderBlocks[index] -} - -// InsertMoveBefore implements regalloc.Function InsertMoveBefore. -func (f *RegAllocFunction[I, M]) InsertMoveBefore(dst, src regalloc.VReg, instr regalloc.Instr) { -	f.m.InsertMoveBefore(dst, src, instr.(I)) -} - -// LowestCommonAncestor implements regalloc.Function LowestCommonAncestor. -func (f *RegAllocFunction[I, M]) LowestCommonAncestor(blk1, blk2 regalloc.Block) regalloc.Block { -	ret := f.ssb.LowestCommonAncestor(blk1.(*RegAllocBlock[I, M]).sb, blk2.(*RegAllocBlock[I, M]).sb) -	l := f.m.SSABlockLabel(ret.ID()) -	index := f.labelToRegAllocBlockIndex[l] -	return &f.reversePostOrderBlocks[index] -} - -// Idom implements regalloc.Function Idom. -func (f *RegAllocFunction[I, M]) Idom(blk regalloc.Block) regalloc.Block { -	builder := f.ssb -	idom := builder.Idom(blk.(*RegAllocBlock[I, M]).sb) -	if idom == nil { -		panic("BUG: idom must not be nil") -	} -	l := f.m.SSABlockLabel(idom.ID()) -	index := f.labelToRegAllocBlockIndex[l] -	return &f.reversePostOrderBlocks[index] -} - -// ID implements regalloc.Block. -func (r *RegAllocBlock[I, m]) ID() int32 { return int32(r.id) } - -// BlockParams implements regalloc.Block. -func (r *RegAllocBlock[I, m]) BlockParams(regs *[]regalloc.VReg) []regalloc.VReg { -	c := r.f.c -	*regs = (*regs)[:0] -	for i := 0; i < r.sb.Params(); i++ { -		v := c.VRegOf(r.sb.Param(i)) -		*regs = append(*regs, v) -	} -	return *regs -} - -// InstrIteratorBegin implements regalloc.Block. -func (r *RegAllocBlock[I, m]) InstrIteratorBegin() regalloc.Instr { -	r.cur = r.begin -	return r.cur -} - -// InstrIteratorNext implements regalloc.Block. -func (r *RegAllocBlock[I, m]) InstrIteratorNext() regalloc.Instr { -	for { -		if r.cur == r.end { -			return nil -		} -		instr := r.cur.Next() -		r.cur = instr.(I) -		if instr == nil { -			return nil -		} else if instr.AddedBeforeRegAlloc() { -			// Only concerned about the instruction added before regalloc. -			return instr -		} -	} -} - -// InstrRevIteratorBegin implements regalloc.Block. -func (r *RegAllocBlock[I, m]) InstrRevIteratorBegin() regalloc.Instr { -	r.cur = r.end -	return r.cur -} - -// InstrRevIteratorNext implements regalloc.Block. -func (r *RegAllocBlock[I, m]) InstrRevIteratorNext() regalloc.Instr { -	for { -		if r.cur == r.begin { -			return nil -		} -		instr := r.cur.Prev() -		r.cur = instr.(I) -		if instr == nil { -			return nil -		} else if instr.AddedBeforeRegAlloc() { -			// Only concerned about the instruction added before regalloc. -			return instr -		} -	} -} - -// FirstInstr implements regalloc.Block. -func (r *RegAllocBlock[I, m]) FirstInstr() regalloc.Instr { -	return r.begin -} - -// EndInstr implements regalloc.Block. -func (r *RegAllocBlock[I, m]) EndInstr() regalloc.Instr { -	return r.end -} - -// LastInstrForInsertion implements regalloc.Block. -func (r *RegAllocBlock[I, m]) LastInstrForInsertion() regalloc.Instr { -	var nil I -	if r.cachedLastInstrForInsertion == nil { -		r.cachedLastInstrForInsertion = r.f.m.LastInstrForInsertion(r.begin, r.end) -	} -	return r.cachedLastInstrForInsertion -} - -// Preds implements regalloc.Block. -func (r *RegAllocBlock[I, m]) Preds() int { return r.sb.Preds() } - -// Pred implements regalloc.Block. -func (r *RegAllocBlock[I, m]) Pred(i int) regalloc.Block { -	sb := r.sb -	pred := sb.Pred(i) -	l := r.f.m.SSABlockLabel(pred.ID()) -	index := r.f.labelToRegAllocBlockIndex[l] -	return &r.f.reversePostOrderBlocks[index] -} - -// Entry implements regalloc.Block. -func (r *RegAllocBlock[I, m]) Entry() bool { return r.sb.EntryBlock() } - -// Succs implements regalloc.Block. -func (r *RegAllocBlock[I, m]) Succs() int { -	return r.sb.Succs() -} - -// Succ implements regalloc.Block. -func (r *RegAllocBlock[I, m]) Succ(i int) regalloc.Block { -	sb := r.sb -	succ := sb.Succ(i) -	if succ.ReturnBlock() { -		return nil -	} -	l := r.f.m.SSABlockLabel(succ.ID()) -	index := r.f.labelToRegAllocBlockIndex[l] -	return &r.f.reversePostOrderBlocks[index] -} - -// LoopHeader implements regalloc.Block. -func (r *RegAllocBlock[I, m]) LoopHeader() bool { -	return r.sb.LoopHeader() -} - -// LoopNestingForestChildren implements regalloc.Block. -func (r *RegAllocBlock[I, m]) LoopNestingForestChildren() int { -	r.loopNestingForestChildren = r.sb.LoopNestingForestChildren() -	return len(r.loopNestingForestChildren) -} - -// LoopNestingForestChild implements regalloc.Block. -func (r *RegAllocBlock[I, m]) LoopNestingForestChild(i int) regalloc.Block { -	blk := r.loopNestingForestChildren[i] -	l := r.f.m.SSABlockLabel(blk.ID()) -	index := r.f.labelToRegAllocBlockIndex[l] -	return &r.f.reversePostOrderBlocks[index] -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go index 23157b478..5d15bd9dc 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go @@ -4,104 +4,100 @@ import "fmt"  // These interfaces are implemented by ISA-specific backends to abstract away the details, and allow the register  // allocators to work on any ISA. -// -// TODO: the interfaces are not stabilized yet, especially x64 will need some changes. E.g. x64 has an addressing mode -// 	where index can be in memory. That kind of info will be useful to reduce the register pressure, and should be leveraged -// 	by the register allocators, like https://docs.rs/regalloc2/latest/regalloc2/enum.OperandConstraint.html  type (  	// Function is the top-level interface to do register allocation, which corresponds to a CFG containing  	// Blocks(s). -	Function interface { +	// +	// I is the type of the instruction, and B is the type of the basic block. +	Function[I Instr, B Block[I]] interface {  		// PostOrderBlockIteratorBegin returns the first block in the post-order traversal of the CFG.  		// In other words, the last blocks in the CFG will be returned first. -		PostOrderBlockIteratorBegin() Block +		PostOrderBlockIteratorBegin() B  		// PostOrderBlockIteratorNext returns the next block in the post-order traversal of the CFG. -		PostOrderBlockIteratorNext() Block +		PostOrderBlockIteratorNext() B  		// ReversePostOrderBlockIteratorBegin returns the first block in the reverse post-order traversal of the CFG.  		// In other words, the first blocks in the CFG will be returned first. -		ReversePostOrderBlockIteratorBegin() Block +		ReversePostOrderBlockIteratorBegin() B  		// ReversePostOrderBlockIteratorNext returns the next block in the reverse post-order traversal of the CFG. -		ReversePostOrderBlockIteratorNext() Block +		ReversePostOrderBlockIteratorNext() B  		// ClobberedRegisters tell the clobbered registers by this function.  		ClobberedRegisters([]VReg)  		// LoopNestingForestRoots returns the number of roots of the loop nesting forest in a function.  		LoopNestingForestRoots() int  		// LoopNestingForestRoot returns the i-th root of the loop nesting forest in a function. -		LoopNestingForestRoot(i int) Block +		LoopNestingForestRoot(i int) B  		// LowestCommonAncestor returns the lowest common ancestor of two blocks in the dominator tree. -		LowestCommonAncestor(blk1, blk2 Block) Block +		LowestCommonAncestor(blk1, blk2 B) B  		// Idom returns the immediate dominator of the given block. -		Idom(blk Block) Block +		Idom(blk B) B + +		// LoopNestingForestChild returns the i-th child of the block in the loop nesting forest. +		LoopNestingForestChild(b B, i int) B +		// Pred returns the i-th predecessor of the block in the CFG. +		Pred(b B, i int) B +		// Succ returns the i-th successor of the block in the CFG. +		Succ(b B, i int) B +		// BlockParams returns the virtual registers used as the parameters of this block. +		BlockParams(B, *[]VReg) []VReg  		// Followings are for rewriting the function. -		// SwapAtEndOfBlock swaps the two virtual registers at the end of the given block. -		SwapBefore(x1, x2, tmp VReg, instr Instr) +		// SwapBefore swaps the two virtual registers at the end of the given block. +		SwapBefore(x1, x2, tmp VReg, instr I)  		// StoreRegisterBefore inserts store instruction(s) before the given instruction for the given virtual register. -		StoreRegisterBefore(v VReg, instr Instr) +		StoreRegisterBefore(v VReg, instr I)  		// StoreRegisterAfter inserts store instruction(s) after the given instruction for the given virtual register. -		StoreRegisterAfter(v VReg, instr Instr) +		StoreRegisterAfter(v VReg, instr I)  		// ReloadRegisterBefore inserts reload instruction(s) before the given instruction for the given virtual register. -		ReloadRegisterBefore(v VReg, instr Instr) +		ReloadRegisterBefore(v VReg, instr I)  		// ReloadRegisterAfter inserts reload instruction(s) after the given instruction for the given virtual register. -		ReloadRegisterAfter(v VReg, instr Instr) +		ReloadRegisterAfter(v VReg, instr I)  		// InsertMoveBefore inserts move instruction(s) before the given instruction for the given virtual registers. -		InsertMoveBefore(dst, src VReg, instr Instr) +		InsertMoveBefore(dst, src VReg, instr I)  	}  	// Block is a basic block in the CFG of a function, and it consists of multiple instructions, and predecessor Block(s). -	Block interface { +	// Right now, this corresponds to a ssa.BasicBlock lowered to the machine level. +	Block[I Instr] interface { +		comparable  		// ID returns the unique identifier of this block which is ordered in the reverse post-order traversal of the CFG.  		ID() int32 -		// BlockParams returns the virtual registers used as the parameters of this block. -		BlockParams(*[]VReg) []VReg  		// InstrIteratorBegin returns the first instruction in this block. Instructions added after lowering must be skipped.  		// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr. -		InstrIteratorBegin() Instr +		InstrIteratorBegin() I  		// InstrIteratorNext returns the next instruction in this block. Instructions added after lowering must be skipped.  		// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr. -		InstrIteratorNext() Instr +		InstrIteratorNext() I  		// InstrRevIteratorBegin is the same as InstrIteratorBegin, but in the reverse order. -		InstrRevIteratorBegin() Instr +		InstrRevIteratorBegin() I  		// InstrRevIteratorNext is the same as InstrIteratorNext, but in the reverse order. -		InstrRevIteratorNext() Instr +		InstrRevIteratorNext() I  		// FirstInstr returns the fist instruction in this block where instructions will be inserted after it. -		FirstInstr() Instr -		// EndInstr returns the end instruction in this block. -		EndInstr() Instr +		FirstInstr() I  		// LastInstrForInsertion returns the last instruction in this block where instructions will be inserted before it.  		// Such insertions only happen when we need to insert spill/reload instructions to adjust the merge edges.  		// At the time of register allocation, all the critical edges are already split, so there is no need  		// to worry about the case where branching instruction has multiple successors.  		// Therefore, usually, it is the nop instruction, but if the block ends with an unconditional branching, then it returns  		// the unconditional branch, not the nop. In other words it is either nop or unconditional branch. -		LastInstrForInsertion() Instr +		LastInstrForInsertion() I  		// Preds returns the number of predecessors of this block in the CFG.  		Preds() int -		// Pred returns the i-th predecessor of this block in the CFG. -		Pred(i int) Block  		// Entry returns true if the block is for the entry block.  		Entry() bool  		// Succs returns the number of successors of this block in the CFG.  		Succs() int -		// Succ returns the i-th successor of this block in the CFG. -		Succ(i int) Block  		// LoopHeader returns true if this block is a loop header.  		LoopHeader() bool  		// LoopNestingForestChildren returns the number of children of this block in the loop nesting forest.  		LoopNestingForestChildren() int -		// LoopNestingForestChild returns the i-th child of this block in the loop nesting forest. -		LoopNestingForestChild(i int) Block  	}  	// Instr is an instruction in a block, abstracting away the underlying ISA.  	Instr interface { +		comparable  		fmt.Stringer -		// Next returns the next instruction in the same block. -		Next() Instr -		// Prev returns the previous instruction in the same block. -		Prev() Instr  		// Defs returns the virtual registers defined by this instruction.  		Defs(*[]VReg) []VReg  		// Uses returns the virtual registers used by this instruction. @@ -124,13 +120,5 @@ type (  		IsIndirectCall() bool  		// IsReturn returns true if this instruction is a return instruction.  		IsReturn() bool -		// AddedBeforeRegAlloc returns true if this instruction is added before register allocation. -		AddedBeforeRegAlloc() bool -	} - -	// InstrConstraint is an interface for arch-specific instruction constraints. -	InstrConstraint interface { -		comparable -		Instr  	}  ) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go index eacb6a7ef..a5857f4f2 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go @@ -18,13 +18,13 @@ import (  )  // NewAllocator returns a new Allocator. -func NewAllocator(allocatableRegs *RegisterInfo) Allocator { -	a := Allocator{ +func NewAllocator[I Instr, B Block[I], F Function[I, B]](allocatableRegs *RegisterInfo) Allocator[I, B, F] { +	a := Allocator[I, B, F]{  		regInfo:            allocatableRegs, -		phiDefInstListPool: wazevoapi.NewPool[phiDefInstList](resetPhiDefInstList), -		blockStates:        wazevoapi.NewIDedPool[blockState](resetBlockState), +		phiDefInstListPool: wazevoapi.NewPool[phiDefInstList[I]](resetPhiDefInstList[I]), +		blockStates:        wazevoapi.NewIDedPool[blockState[I, B, F]](resetBlockState[I, B, F]),  	} -	a.state.vrStates = wazevoapi.NewIDedPool[vrState](resetVrState) +	a.state.vrStates = wazevoapi.NewIDedPool[vrState[I, B, F]](resetVrState[I, B, F])  	a.state.reset()  	for _, regs := range allocatableRegs.AllocatableRegisters {  		for _, r := range regs { @@ -49,32 +49,39 @@ type (  	}  	// Allocator is a register allocator. -	Allocator struct { +	Allocator[I Instr, B Block[I], F Function[I, B]] struct {  		// regInfo is static per ABI/ISA, and is initialized by the machine during Machine.PrepareRegisterAllocator.  		regInfo *RegisterInfo  		// allocatableSet is a set of allocatable RealReg derived from regInfo. Static per ABI/ISA.  		allocatableSet           RegSet  		allocatedCalleeSavedRegs []VReg  		vs                       []VReg -		vs2                      []VRegID -		phiDefInstListPool       wazevoapi.Pool[phiDefInstList] +		ss                       []*vrState[I, B, F] +		copies                   []_copy[I, B, F] +		phiDefInstListPool       wazevoapi.Pool[phiDefInstList[I]]  		// Followings are re-used during various places. -		blks  []Block +		blks  []B  		reals []RealReg  		// Following two fields are updated while iterating the blocks in the reverse postorder. -		state       state -		blockStates wazevoapi.IDedPool[blockState] +		state       state[I, B, F] +		blockStates wazevoapi.IDedPool[blockState[I, B, F]] +	} + +	// _copy represents a source and destination pair of a copy instruction. +	_copy[I Instr, B Block[I], F Function[I, B]] struct { +		src   *vrState[I, B, F] +		dstID VRegID  	}  	// programCounter represents an opaque index into the program which is used to represents a LiveInterval of a VReg.  	programCounter int32 -	state struct { +	state[I Instr, B Block[I], F Function[I, B]] struct {  		argRealRegs []VReg -		regsInUse   regInUseSet -		vrStates    wazevoapi.IDedPool[vrState] +		regsInUse   regInUseSet[I, B, F] +		vrStates    wazevoapi.IDedPool[vrState[I, B, F]]  		currentBlockID int32 @@ -82,30 +89,30 @@ type (  		allocatedRegSet RegSet  	} -	blockState struct { +	blockState[I Instr, B Block[I], F Function[I, B]] struct {  		// liveIns is a list of VReg that are live at the beginning of the block. -		liveIns []VRegID +		liveIns []*vrState[I, B, F]  		// seen is true if the block is visited during the liveness analysis.  		seen bool  		// visited is true if the block is visited during the allocation phase.  		visited            bool  		startFromPredIndex int  		// startRegs is a list of RealReg that are used at the beginning of the block. This is used to fix the merge edges. -		startRegs regInUseSet +		startRegs regInUseSet[I, B, F]  		// endRegs is a list of RealReg that are used at the end of the block. This is used to fix the merge edges. -		endRegs regInUseSet +		endRegs regInUseSet[I, B, F]  	} -	vrState struct { +	vrState[I Instr, B Block[I], f Function[I, B]] struct {  		v VReg  		r RealReg  		// defInstr is the instruction that defines this value. If this is the phi value and not the entry block, this is nil. -		defInstr Instr +		defInstr I  		// defBlk is the block that defines this value. If this is the phi value, this is the block whose arguments contain this value. -		defBlk Block +		defBlk B  		// lca = lowest common ancestor. This is the block that is the lowest common ancestor of all the blocks that  		// reloads this value. This is used to determine the spill location. Only valid if spilled=true. -		lca Block +		lca B  		// lastUse is the program counter of the last use of this value. This changes while iterating the block, and  		// should not be used across the blocks as it becomes invalid. To check the validity, use lastUseUpdatedAtBlockID.  		lastUse                 programCounter @@ -120,14 +127,14 @@ type (  		desiredLoc desiredLoc  		// phiDefInstList is a list of instructions that defines this phi value.  		// This is used to determine the spill location, and only valid if isPhi=true. -		*phiDefInstList +		*phiDefInstList[I]  	}  	// phiDefInstList is a linked list of instructions that defines a phi value. -	phiDefInstList struct { -		instr Instr +	phiDefInstList[I Instr] struct { +		instr I  		v     VReg -		next  *phiDefInstList +		next  *phiDefInstList[I]  	}  	// desiredLoc represents a desired location for a VReg. @@ -159,13 +166,14 @@ func (d desiredLoc) stack() bool {  	return d&3 == desiredLoc(desiredLocKindStack)  } -func resetPhiDefInstList(l *phiDefInstList) { -	l.instr = nil +func resetPhiDefInstList[I Instr](l *phiDefInstList[I]) { +	var nilInstr I +	l.instr = nilInstr  	l.next = nil  	l.v = VRegInvalid  } -func (s *state) dump(info *RegisterInfo) { //nolint:unused +func (s *state[I, B, F]) dump(info *RegisterInfo) { //nolint:unused  	fmt.Println("\t\tstate:")  	fmt.Println("\t\t\targRealRegs:", s.argRealRegs)  	fmt.Println("\t\t\tregsInUse", s.regsInUse.format(info)) @@ -184,7 +192,7 @@ func (s *state) dump(info *RegisterInfo) { //nolint:unused  	fmt.Println("\t\t\tvrStates:", strings.Join(strs, ", "))  } -func (s *state) reset() { +func (s *state[I, B, F]) reset() {  	s.argRealRegs = s.argRealRegs[:0]  	s.vrStates.Reset()  	s.allocatedRegSet = RegSet(0) @@ -192,79 +200,74 @@ func (s *state) reset() {  	s.currentBlockID = -1  } -func (s *state) setVRegState(v VReg, r RealReg) { -	id := int(v.ID()) -	st := s.vrStates.GetOrAllocate(id) -	st.r = r -	st.v = v -} - -func resetVrState(vs *vrState) { +func resetVrState[I Instr, B Block[I], F Function[I, B]](vs *vrState[I, B, F]) {  	vs.v = VRegInvalid  	vs.r = RealRegInvalid -	vs.defInstr = nil -	vs.defBlk = nil +	var nilInstr I +	vs.defInstr = nilInstr +	var nilBlk B +	vs.defBlk = nilBlk  	vs.spilled = false  	vs.lastUse = -1  	vs.lastUseUpdatedAtBlockID = -1 -	vs.lca = nil +	vs.lca = nilBlk  	vs.isPhi = false  	vs.phiDefInstList = nil  	vs.desiredLoc = desiredLocUnspecified  } -func (s *state) getVRegState(v VRegID) *vrState { -	return s.vrStates.GetOrAllocate(int(v)) +func (s *state[I, B, F]) getOrAllocateVRegState(v VReg) *vrState[I, B, F] { +	st := s.vrStates.GetOrAllocate(int(v.ID())) +	if st.v == VRegInvalid { +		st.v = v +	} +	return st  } -func (s *state) useRealReg(r RealReg, v VReg) { -	if s.regsInUse.has(r) { -		panic("BUG: useRealReg: the given real register is already used") -	} -	s.regsInUse.add(r, v) -	s.setVRegState(v, r) +func (s *state[I, B, F]) getVRegState(v VRegID) *vrState[I, B, F] { +	return s.vrStates.Get(int(v)) +} + +func (s *state[I, B, F]) useRealReg(r RealReg, vr *vrState[I, B, F]) { +	s.regsInUse.add(r, vr) +	vr.r = r  	s.allocatedRegSet = s.allocatedRegSet.add(r)  } -func (s *state) releaseRealReg(r RealReg) { +func (s *state[I, B, F]) releaseRealReg(r RealReg) {  	current := s.regsInUse.get(r) -	if current.Valid() { +	if current != nil {  		s.regsInUse.remove(r) -		s.setVRegState(current, RealRegInvalid) +		current.r = RealRegInvalid  	}  }  // recordReload records that the given VReg is reloaded in the given block.  // This is used to determine the spill location by tracking the lowest common ancestor of all the blocks that reloads the value. -func (vs *vrState) recordReload(f Function, blk Block) { +func (vs *vrState[I, B, F]) recordReload(f F, blk B) {  	vs.spilled = true -	if vs.lca == nil { +	var nilBlk B +	if lca := vs.lca; lca == nilBlk {  		if wazevoapi.RegAllocLoggingEnabled {  			fmt.Printf("\t\tv%d is reloaded in blk%d,\n", vs.v.ID(), blk.ID())  		}  		vs.lca = blk -	} else { +	} else if lca != blk {  		if wazevoapi.RegAllocLoggingEnabled {  			fmt.Printf("\t\tv%d is reloaded in blk%d, lca=%d\n", vs.v.ID(), blk.ID(), vs.lca.ID())  		} -		vs.lca = f.LowestCommonAncestor(vs.lca, blk) +		vs.lca = f.LowestCommonAncestor(lca, blk)  		if wazevoapi.RegAllocLoggingEnabled {  			fmt.Printf("updated lca=%d\n", vs.lca.ID())  		}  	}  } -func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forbiddenMask RegSet, preferred RealReg) (r RealReg) { +func (a *Allocator[I, B, F]) findOrSpillAllocatable(s *state[I, B, F], allocatable []RealReg, forbiddenMask RegSet, preferred RealReg) (r RealReg) {  	r = RealRegInvalid  	// First, check if the preferredMask has any allocatable register.  	if preferred != RealRegInvalid && !forbiddenMask.has(preferred) && !s.regsInUse.has(preferred) { -		for _, candidateReal := range allocatable { -			// TODO: we should ensure the preferred register is in the allocatable set in the first place, -			//  but right now, just in case, we check it here. -			if candidateReal == preferred { -				return preferred -			} -		} +		return preferred  	}  	var lastUseAt programCounter @@ -275,7 +278,7 @@ func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forb  		}  		using := s.regsInUse.get(candidateReal) -		if using == VRegInvalid { +		if using == nil {  			// This is not used at this point.  			return candidateReal  		} @@ -284,17 +287,17 @@ func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forb  		// For example, if the register is used as an argument register, and it might be  		// spilled and not reloaded when it ends up being used as a temporary to pass  		// stack based argument. -		if using.IsRealReg() { +		if using.v.IsRealReg() {  			continue  		}  		isPreferred := candidateReal == preferred  		// last == -1 means the value won't be used anymore. -		if last := s.getVRegState(using.ID()).lastUse; r == RealRegInvalid || isPreferred || last == -1 || (lastUseAt != -1 && last > lastUseAt) { +		if last := using.lastUse; r == RealRegInvalid || isPreferred || last == -1 || (lastUseAt != -1 && last > lastUseAt) {  			lastUseAt = last  			r = candidateReal -			spillVReg = using +			spillVReg = using.v  			if isPreferred {  				break  			} @@ -312,7 +315,7 @@ func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forb  	return r  } -func (s *state) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) RealReg { +func (s *state[I, B, F]) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) RealReg {  	for _, r := range allocatable {  		if !s.regsInUse.has(r) && !forbiddenMask.has(r) {  			return r @@ -321,22 +324,20 @@ func (s *state) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) Rea  	return RealRegInvalid  } -func (s *state) resetAt(bs *blockState) { -	s.regsInUse.range_(func(_ RealReg, vr VReg) { -		s.setVRegState(vr, RealRegInvalid) +func (s *state[I, B, F]) resetAt(bs *blockState[I, B, F]) { +	s.regsInUse.range_(func(_ RealReg, vs *vrState[I, B, F]) { +		vs.r = RealRegInvalid  	})  	s.regsInUse.reset() -	bs.endRegs.range_(func(r RealReg, v VReg) { -		id := int(v.ID()) -		st := s.vrStates.GetOrAllocate(id) -		if st.lastUseUpdatedAtBlockID == s.currentBlockID && st.lastUse == programCounterLiveIn { -			s.regsInUse.add(r, v) -			s.setVRegState(v, r) +	bs.endRegs.range_(func(r RealReg, vs *vrState[I, B, F]) { +		if vs.lastUseUpdatedAtBlockID == s.currentBlockID && vs.lastUse == programCounterLiveIn { +			s.regsInUse.add(r, vs) +			vs.r = r  		}  	})  } -func resetBlockState(b *blockState) { +func resetBlockState[I Instr, B Block[I], F Function[I, B]](b *blockState[I, B, F]) {  	b.seen = false  	b.visited = false  	b.endRegs.reset() @@ -345,7 +346,7 @@ func resetBlockState(b *blockState) {  	b.liveIns = b.liveIns[:0]  } -func (b *blockState) dump(a *RegisterInfo) { +func (b *blockState[I, B, F]) dump(a *RegisterInfo) {  	fmt.Println("\t\tblockState:")  	fmt.Println("\t\t\tstartRegs:", b.startRegs.format(a))  	fmt.Println("\t\t\tendRegs:", b.endRegs.format(a)) @@ -354,13 +355,13 @@ func (b *blockState) dump(a *RegisterInfo) {  }  // DoAllocation performs register allocation on the given Function. -func (a *Allocator) DoAllocation(f Function) { +func (a *Allocator[I, B, F]) DoAllocation(f F) {  	a.livenessAnalysis(f)  	a.alloc(f)  	a.determineCalleeSavedRealRegs(f)  } -func (a *Allocator) determineCalleeSavedRealRegs(f Function) { +func (a *Allocator[I, B, F]) determineCalleeSavedRealRegs(f F) {  	a.allocatedCalleeSavedRegs = a.allocatedCalleeSavedRegs[:0]  	a.state.allocatedRegSet.Range(func(allocatedRealReg RealReg) {  		if a.regInfo.CalleeSavedRegisters.has(allocatedRealReg) { @@ -370,17 +371,17 @@ func (a *Allocator) determineCalleeSavedRealRegs(f Function) {  	f.ClobberedRegisters(a.allocatedCalleeSavedRegs)  } -func (a *Allocator) getOrAllocateBlockState(blockID int32) *blockState { +func (a *Allocator[I, B, F]) getOrAllocateBlockState(blockID int32) *blockState[I, B, F] {  	return a.blockStates.GetOrAllocate(int(blockID))  }  // phiBlk returns the block that defines the given phi value, nil otherwise. -func (s *state) phiBlk(v VRegID) Block { -	vs := s.getVRegState(v) +func (vs *vrState[I, B, F]) phiBlk() B {  	if vs.isPhi {  		return vs.defBlk  	} -	return nil +	var nilBlk B +	return nilBlk  }  const ( @@ -390,31 +391,35 @@ const (  // liveAnalysis constructs Allocator.blockLivenessData.  // The algorithm here is described in https://pfalcon.github.io/ssabook/latest/book-full.pdf Chapter 9.2. -func (a *Allocator) livenessAnalysis(f Function) { +func (a *Allocator[I, B, F]) livenessAnalysis(f F) {  	s := &a.state -	for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() { // Order doesn't matter. +	for i := VRegID(0); i < vRegIDReservedForRealNum; i++ { +		s.getOrAllocateVRegState(VReg(i).SetRealReg(RealReg(i))) +	} + +	var nilBlk B +	var nilInstr I +	for blk := f.PostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.PostOrderBlockIteratorNext() {  		// We should gather phi value data. -		for _, p := range blk.BlockParams(&a.vs) { -			vs := s.getVRegState(p.ID()) +		for _, p := range f.BlockParams(blk, &a.vs) { +			vs := s.getOrAllocateVRegState(p)  			vs.isPhi = true  			vs.defBlk = blk  		} -	} -	for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() {  		blkID := blk.ID()  		info := a.getOrAllocateBlockState(blkID) -		a.vs2 = a.vs2[:0] +		a.ss = a.ss[:0]  		const (  			flagDeleted = false  			flagLive    = true  		)  		ns := blk.Succs()  		for i := 0; i < ns; i++ { -			succ := blk.Succ(i) -			if succ == nil { +			succ := f.Succ(blk, i) +			if succ == nilBlk {  				continue  			} @@ -424,39 +429,39 @@ func (a *Allocator) livenessAnalysis(f Function) {  				continue  			} -			for _, v := range succInfo.liveIns { -				if s.phiBlk(v) != succ { -					st := s.getVRegState(v) +			for _, st := range succInfo.liveIns { +				if st.phiBlk() != succ && st.spilled != flagLive { //nolint:gosimple  					// We use .spilled field to store the flag.  					st.spilled = flagLive -					a.vs2 = append(a.vs2, v) +					a.ss = append(a.ss, st)  				}  			}  		} -		for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() { +		for instr := blk.InstrRevIteratorBegin(); instr != nilInstr; instr = blk.InstrRevIteratorNext() {  			var use, def VReg +			var defIsPhi bool  			for _, def = range instr.Defs(&a.vs) {  				if !def.IsRealReg() { -					id := def.ID() -					st := s.getVRegState(id) -					// We use .spilled field to store the flag. +					st := s.getOrAllocateVRegState(def) +					defIsPhi = st.isPhi +					// Note: We use .spilled field to store the flag.  					st.spilled = flagDeleted -					a.vs2 = append(a.vs2, id)  				}  			}  			for _, use = range instr.Uses(&a.vs) {  				if !use.IsRealReg() { -					id := use.ID() -					st := s.getVRegState(id) -					// We use .spilled field to store the flag. -					st.spilled = flagLive -					a.vs2 = append(a.vs2, id) +					st := s.getOrAllocateVRegState(use) +					// Note: We use .spilled field to store the flag. +					if st.spilled != flagLive { //nolint:gosimple +						st.spilled = flagLive +						a.ss = append(a.ss, st) +					}  				}  			} -			if def.Valid() && s.phiBlk(def.ID()) != nil { +			if defIsPhi {  				if use.Valid() && use.IsRealReg() {  					// If the destination is a phi value, and the source is a real register, this is the beginning of the function.  					a.state.argRealRegs = append(a.state.argRealRegs, use) @@ -464,11 +469,10 @@ func (a *Allocator) livenessAnalysis(f Function) {  			}  		} -		for _, v := range a.vs2 { -			st := s.getVRegState(v) +		for _, st := range a.ss {  			// We use .spilled field to store the flag.  			if st.spilled == flagLive { //nolint:gosimple -				info.liveIns = append(info.liveIns, v) +				info.liveIns = append(info.liveIns, st)  				st.spilled = false  			}  		} @@ -479,51 +483,48 @@ func (a *Allocator) livenessAnalysis(f Function) {  	nrs := f.LoopNestingForestRoots()  	for i := 0; i < nrs; i++ {  		root := f.LoopNestingForestRoot(i) -		a.loopTreeDFS(root) +		a.loopTreeDFS(f, root)  	}  }  // loopTreeDFS implements the Algorithm 9.3 in the book in an iterative way. -func (a *Allocator) loopTreeDFS(entry Block) { +func (a *Allocator[I, B, F]) loopTreeDFS(f F, entry B) {  	a.blks = a.blks[:0]  	a.blks = append(a.blks, entry) -	s := &a.state  	for len(a.blks) > 0 {  		tail := len(a.blks) - 1  		loop := a.blks[tail]  		a.blks = a.blks[:tail] -		a.vs2 = a.vs2[:0] +		a.ss = a.ss[:0]  		const (  			flagDone    = false  			flagPending = true  		)  		info := a.getOrAllocateBlockState(loop.ID()) -		for _, v := range info.liveIns { -			if s.phiBlk(v) != loop { -				a.vs2 = append(a.vs2, v) -				st := s.getVRegState(v) +		for _, st := range info.liveIns { +			if st.phiBlk() != loop { +				a.ss = append(a.ss, st)  				// We use .spilled field to store the flag.  				st.spilled = flagPending  			}  		} -		var siblingAddedView []VRegID +		var siblingAddedView []*vrState[I, B, F]  		cn := loop.LoopNestingForestChildren()  		for i := 0; i < cn; i++ { -			child := loop.LoopNestingForestChild(i) +			child := f.LoopNestingForestChild(loop, i)  			childID := child.ID()  			childInfo := a.getOrAllocateBlockState(childID)  			if i == 0 {  				begin := len(childInfo.liveIns) -				for _, v := range a.vs2 { -					st := s.getVRegState(v) +				for _, st := range a.ss {  					// We use .spilled field to store the flag.  					if st.spilled == flagPending { //nolint:gosimple  						st.spilled = flagDone  						// TODO: deduplicate, though I don't think it has much impact. -						childInfo.liveIns = append(childInfo.liveIns, v) +						childInfo.liveIns = append(childInfo.liveIns, st)  					}  				}  				siblingAddedView = childInfo.liveIns[begin:] @@ -539,8 +540,7 @@ func (a *Allocator) loopTreeDFS(entry Block) {  		if cn == 0 {  			// If there's no forest child, we haven't cleared the .spilled field at this point. -			for _, v := range a.vs2 { -				st := s.getVRegState(v) +			for _, st := range a.ss {  				st.spilled = false  			}  		} @@ -557,37 +557,36 @@ func (a *Allocator) loopTreeDFS(entry Block) {  // the spill happens in the block that is the lowest common ancestor of all the blocks that reloads the value.  //  // All of these logics are almost the same as Go's compiler which has a dedicated description in the source file ^^. -func (a *Allocator) alloc(f Function) { +func (a *Allocator[I, B, F]) alloc(f F) {  	// First we allocate each block in the reverse postorder (at least one predecessor should be allocated for each block). -	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() { +	var nilBlk B +	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.ReversePostOrderBlockIteratorNext() {  		if wazevoapi.RegAllocLoggingEnabled {  			fmt.Printf("========== allocating blk%d ========\n", blk.ID())  		}  		if blk.Entry() { -			a.finalizeStartReg(blk) +			a.finalizeStartReg(f, blk)  		}  		a.allocBlock(f, blk)  	}  	// After the allocation, we all know the start and end state of each block. So we can fix the merge states. -	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() { +	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.ReversePostOrderBlockIteratorNext() {  		a.fixMergeState(f, blk)  	}  	// Finally, we insert the spill instructions as we know all the places where the reloads happen.  	a.scheduleSpills(f)  } -func (a *Allocator) updateLiveInVRState(liveness *blockState) { +func (a *Allocator[I, B, F]) updateLiveInVRState(liveness *blockState[I, B, F]) {  	currentBlockID := a.state.currentBlockID -	for _, v := range liveness.liveIns { -		vs := a.state.getVRegState(v) +	for _, vs := range liveness.liveIns {  		vs.lastUse = programCounterLiveIn  		vs.lastUseUpdatedAtBlockID = currentBlockID  	}  } -func (a *Allocator) finalizeStartReg(blk Block) { +func (a *Allocator[I, B, F]) finalizeStartReg(f F, blk B) {  	bID := blk.ID() -	liveness := a.getOrAllocateBlockState(bID)  	s := &a.state  	currentBlkState := a.getOrAllocateBlockState(bID)  	if currentBlkState.startFromPredIndex > -1 { @@ -595,20 +594,20 @@ func (a *Allocator) finalizeStartReg(blk Block) {  	}  	s.currentBlockID = bID -	a.updateLiveInVRState(liveness) +	a.updateLiveInVRState(currentBlkState)  	preds := blk.Preds() -	var predState *blockState +	var predState *blockState[I, B, F]  	switch preds {  	case 0: // This is the entry block.  	case 1: -		predID := blk.Pred(0).ID() +		predID := f.Pred(blk, 0).ID()  		predState = a.getOrAllocateBlockState(predID)  		currentBlkState.startFromPredIndex = 0  	default:  		// TODO: there should be some better heuristic to choose the predecessor.  		for i := 0; i < preds; i++ { -			predID := blk.Pred(i).ID() +			predID := f.Pred(blk, i).ID()  			if _predState := a.getOrAllocateBlockState(predID); _predState.visited {  				predState = _predState  				currentBlkState.startFromPredIndex = i @@ -621,18 +620,18 @@ func (a *Allocator) finalizeStartReg(blk Block) {  			panic(fmt.Sprintf("BUG: at lease one predecessor should be visited for blk%d", blk.ID()))  		}  		for _, u := range s.argRealRegs { -			s.useRealReg(u.RealReg(), u) +			s.useRealReg(u.RealReg(), s.getVRegState(u.ID()))  		}  		currentBlkState.startFromPredIndex = 0 -	} else if predState != nil { +	} else {  		if wazevoapi.RegAllocLoggingEnabled {  			fmt.Printf("allocating blk%d starting from blk%d (on index=%d) \n", -				bID, blk.Pred(currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex) +				bID, f.Pred(blk, currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex)  		}  		s.resetAt(predState)  	} -	s.regsInUse.range_(func(allocated RealReg, v VReg) { +	s.regsInUse.range_(func(allocated RealReg, v *vrState[I, B, F]) {  		currentBlkState.startRegs.add(allocated, v)  	})  	if wazevoapi.RegAllocLoggingEnabled { @@ -640,7 +639,7 @@ func (a *Allocator) finalizeStartReg(blk Block) {  	}  } -func (a *Allocator) allocBlock(f Function, blk Block) { +func (a *Allocator[I, B, F]) allocBlock(f F, blk B) {  	bID := blk.ID()  	s := &a.state  	currentBlkState := a.getOrAllocateBlockState(bID) @@ -651,36 +650,34 @@ func (a *Allocator) allocBlock(f Function, blk Block) {  	}  	// Clears the previous state. -	s.regsInUse.range_(func(allocatedRealReg RealReg, vr VReg) { -		s.setVRegState(vr, RealRegInvalid) -	}) +	s.regsInUse.range_(func(allocatedRealReg RealReg, vr *vrState[I, B, F]) { vr.r = RealRegInvalid })  	s.regsInUse.reset()  	// Then set the start state. -	currentBlkState.startRegs.range_(func(allocatedRealReg RealReg, vr VReg) { -		s.useRealReg(allocatedRealReg, vr) -	}) +	currentBlkState.startRegs.range_(func(allocatedRealReg RealReg, vr *vrState[I, B, F]) { s.useRealReg(allocatedRealReg, vr) }) -	desiredUpdated := a.vs2[:0] +	desiredUpdated := a.ss[:0]  	// Update the last use of each VReg. +	a.copies = a.copies[:0] // Stores the copy instructions.  	var pc programCounter -	for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() { -		var use, def VReg -		for _, use = range instr.Uses(&a.vs) { +	var nilInstr I +	for instr := blk.InstrIteratorBegin(); instr != nilInstr; instr = blk.InstrIteratorNext() { +		var useState *vrState[I, B, F] +		for _, use := range instr.Uses(&a.vs) { +			useState = s.getVRegState(use.ID())  			if !use.IsRealReg() { -				s.getVRegState(use.ID()).lastUse = pc +				useState.lastUse = pc  			}  		}  		if instr.IsCopy() { -			def = instr.Defs(&a.vs)[0] +			def := instr.Defs(&a.vs)[0] +			a.copies = append(a.copies, _copy[I, B, F]{src: useState, dstID: def.ID()})  			r := def.RealReg()  			if r != RealRegInvalid { -				useID := use.ID() -				vs := s.getVRegState(useID) -				if !vs.isPhi { // TODO: no idea why do we need this. -					vs.desiredLoc = newDesiredLocReg(r) -					desiredUpdated = append(desiredUpdated, useID) +				if !useState.isPhi { // TODO: no idea why do we need this. +					useState.desiredLoc = newDesiredLocReg(r) +					desiredUpdated = append(desiredUpdated, useState)  				}  			}  		} @@ -689,18 +686,18 @@ func (a *Allocator) allocBlock(f Function, blk Block) {  	// Mark all live-out values by checking live-in of the successors.  	// While doing so, we also update the desired register values. -	var succ Block +	var succ B +	var nilBlk B  	for i, ns := 0, blk.Succs(); i < ns; i++ { -		succ = blk.Succ(i) -		if succ == nil { +		succ = f.Succ(blk, i) +		if succ == nilBlk {  			continue  		}  		succID := succ.ID()  		succState := a.getOrAllocateBlockState(succID) -		for _, v := range succState.liveIns { -			if s.phiBlk(v) != succ { -				st := s.getVRegState(v) +		for _, st := range succState.liveIns { +			if st.phiBlk() != succ {  				st.lastUse = programCounterLiveOut  			}  		} @@ -709,43 +706,33 @@ func (a *Allocator) allocBlock(f Function, blk Block) {  			if wazevoapi.RegAllocLoggingEnabled {  				fmt.Printf("blk%d -> blk%d: start_regs: %s\n", bID, succID, succState.startRegs.format(a.regInfo))  			} -			succState.startRegs.range_(func(allocatedRealReg RealReg, vr VReg) { -				vs := s.getVRegState(vr.ID()) +			succState.startRegs.range_(func(allocatedRealReg RealReg, vs *vrState[I, B, F]) {  				vs.desiredLoc = newDesiredLocReg(allocatedRealReg) -				desiredUpdated = append(desiredUpdated, vr.ID()) +				desiredUpdated = append(desiredUpdated, vs)  			}) -			for _, p := range succ.BlockParams(&a.vs) { +			for _, p := range f.BlockParams(succ, &a.vs) {  				vs := s.getVRegState(p.ID())  				if vs.desiredLoc.realReg() == RealRegInvalid {  					vs.desiredLoc = desiredLocStack -					desiredUpdated = append(desiredUpdated, p.ID()) +					desiredUpdated = append(desiredUpdated, vs)  				}  			}  		}  	}  	// Propagate the desired register values from the end of the block to the beginning. -	for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() { -		if instr.IsCopy() { -			def := instr.Defs(&a.vs)[0] -			defState := s.getVRegState(def.ID()) -			desired := defState.desiredLoc.realReg() -			if desired == RealRegInvalid { -				continue -			} - -			use := instr.Uses(&a.vs)[0] -			useID := use.ID() -			useState := s.getVRegState(useID) -			if s.phiBlk(useID) != succ && useState.desiredLoc == desiredLocUnspecified { -				useState.desiredLoc = newDesiredLocReg(desired) -				desiredUpdated = append(desiredUpdated, useID) -			} +	for _, instr := range a.copies { +		defState := s.getVRegState(instr.dstID) +		desired := defState.desiredLoc.realReg() +		useState := instr.src +		if useState.phiBlk() != succ && useState.desiredLoc == desiredLocUnspecified { +			useState.desiredLoc = newDesiredLocReg(desired) +			desiredUpdated = append(desiredUpdated, useState)  		}  	}  	pc = 0 -	for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() { +	for instr := blk.InstrIteratorBegin(); instr != nilInstr; instr = blk.InstrIteratorNext() {  		if wazevoapi.RegAllocLoggingEnabled {  			fmt.Println(instr)  		} @@ -777,12 +764,12 @@ func (a *Allocator) allocBlock(f Function, blk Block) {  				r := vs.r  				if r == RealRegInvalid { -					r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet, +					r = a.findOrSpillAllocatable(s, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet,  						// Prefer the desired register if it's available.  						vs.desiredLoc.realReg())  					vs.recordReload(f, blk)  					f.ReloadRegisterBefore(use.SetRealReg(r), instr) -					s.useRealReg(r, use) +					s.useRealReg(r, vs)  				}  				if wazevoapi.RegAllocLoggingEnabled {  					fmt.Printf("\ttrying to use v%v on %s\n", use.ID(), a.regInfo.RealRegName(r)) @@ -799,10 +786,9 @@ func (a *Allocator) allocBlock(f Function, blk Block) {  		}  		isIndirect := instr.IsIndirectCall() -		call := instr.IsCall() || isIndirect -		if call { +		if instr.IsCall() || isIndirect {  			addr := RealRegInvalid -			if instr.IsIndirectCall() { +			if isIndirect {  				addr = a.vs[0].RealReg()  			}  			a.releaseCallerSavedRegs(addr) @@ -814,8 +800,8 @@ func (a *Allocator) allocBlock(f Function, blk Block) {  		a.reals = killSet  		defs := instr.Defs(&a.vs) -		switch { -		case len(defs) > 1: +		switch len(defs) { +		default:  			// Some instructions define multiple values on real registers.  			// E.g. call instructions (following calling convention) / div instruction on x64 that defines both rax and rdx.  			// @@ -830,20 +816,21 @@ func (a *Allocator) allocBlock(f Function, blk Block) {  				if s.regsInUse.has(r) {  					s.releaseRealReg(r)  				} -				s.useRealReg(r, def) +				s.useRealReg(r, s.getVRegState(def.ID()))  			} -		case len(defs) == 1: +		case 0: +		case 1:  			def := defs[0] +			vState := s.getVRegState(def.ID())  			if def.IsRealReg() {  				r := def.RealReg()  				if a.allocatableSet.has(r) {  					if s.regsInUse.has(r) {  						s.releaseRealReg(r)  					} -					s.useRealReg(r, def) +					s.useRealReg(r, vState)  				}  			} else { -				vState := s.getVRegState(def.ID())  				r := vState.r  				if desired := vState.desiredLoc.realReg(); desired != RealRegInvalid { @@ -864,7 +851,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) {  							}  							r = desired  							s.releaseRealReg(r) -							s.useRealReg(r, def) +							s.useRealReg(r, vState)  						}  					}  				} @@ -880,9 +867,9 @@ func (a *Allocator) allocBlock(f Function, blk Block) {  					}  					if r == RealRegInvalid {  						typ := def.RegType() -						r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[typ], RegSet(0), RealRegInvalid) +						r = a.findOrSpillAllocatable(s, a.regInfo.AllocatableRegisters[typ], RegSet(0), RealRegInvalid)  					} -					s.useRealReg(r, def) +					s.useRealReg(r, vState)  				}  				dr := def.SetRealReg(r)  				instr.AssignDef(dr) @@ -915,9 +902,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) {  		pc++  	} -	s.regsInUse.range_(func(allocated RealReg, v VReg) { -		currentBlkState.endRegs.add(allocated, v) -	}) +	s.regsInUse.range_(func(allocated RealReg, v *vrState[I, B, F]) { currentBlkState.endRegs.add(allocated, v) })  	currentBlkState.visited = true  	if wazevoapi.RegAllocLoggingEnabled { @@ -925,31 +910,30 @@ func (a *Allocator) allocBlock(f Function, blk Block) {  	}  	// Reset the desired end location. -	for _, v := range desiredUpdated { -		vs := s.getVRegState(v) +	for _, vs := range desiredUpdated {  		vs.desiredLoc = desiredLocUnspecified  	} -	a.vs2 = desiredUpdated[:0] +	a.ss = desiredUpdated[:0]  	for i := 0; i < blk.Succs(); i++ { -		succ := blk.Succ(i) -		if succ == nil { +		succ := f.Succ(blk, i) +		if succ == nilBlk {  			continue  		}  		// If the successor is not visited yet, finalize the start state. -		a.finalizeStartReg(succ) +		a.finalizeStartReg(f, succ)  	}  } -func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) { +func (a *Allocator[I, B, F]) releaseCallerSavedRegs(addrReg RealReg) {  	s := &a.state  	for allocated := RealReg(0); allocated < 64; allocated++ {  		if allocated == addrReg { // If this is the call indirect, we should not touch the addr register.  			continue  		} -		if v := s.regsInUse.get(allocated); v.Valid() { -			if v.IsRealReg() { +		if vs := s.regsInUse.get(allocated); vs != nil { +			if vs.v.IsRealReg() {  				continue // This is the argument register as it's already used by VReg backed by the corresponding RealReg.  			}  			if !a.regInfo.CallerSavedRegisters.has(allocated) { @@ -961,7 +945,7 @@ func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) {  	}  } -func (a *Allocator) fixMergeState(f Function, blk Block) { +func (a *Allocator[I, B, F]) fixMergeState(f F, blk B) {  	preds := blk.Preds()  	if preds <= 1 {  		return @@ -975,7 +959,7 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {  	desiredOccupants := &blkSt.startRegs  	var desiredOccupantsSet RegSet  	for i, v := range desiredOccupants { -		if v != VRegInvalid { +		if v != nil {  			desiredOccupantsSet = desiredOccupantsSet.add(RealReg(i))  		}  	} @@ -992,7 +976,7 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {  			continue  		} -		pred := blk.Pred(i) +		pred := f.Pred(blk, i)  		predSt := a.getOrAllocateBlockState(pred.ID())  		s.resetAt(predSt) @@ -1012,16 +996,16 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {  		for r := RealReg(0); r < 64; r++ {  			desiredVReg := desiredOccupants.get(r) -			if !desiredVReg.Valid() { +			if desiredVReg == nil {  				continue  			}  			currentVReg := s.regsInUse.get(r) -			if desiredVReg.ID() == currentVReg.ID() { +			if currentVReg != nil && desiredVReg.v.ID() == currentVReg.v.ID() {  				continue  			} -			typ := desiredVReg.RegType() +			typ := desiredVReg.v.RegType()  			var tmpRealReg VReg  			if typ == RegTypeInt {  				tmpRealReg = intTmp @@ -1039,13 +1023,18 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {  //   - desiredVReg is the desired VReg value that should be on the register `r`.  //   - freeReg is the temporary register that can be used to swap the values, which may or may not be used.  //   - typ is the register type of the `r`. -func (a *Allocator) reconcileEdge(f Function, +func (a *Allocator[I, B, F]) reconcileEdge(f F,  	r RealReg, -	pred Block, -	currentVReg, desiredVReg VReg, +	pred B, +	currentState, desiredState *vrState[I, B, F],  	freeReg VReg,  	typ RegType,  ) { +	desiredVReg := desiredState.v +	currentVReg := VRegInvalid +	if currentState != nil { +		currentVReg = currentState.v +	}  	// There are four cases to consider:  	// 1. currentVReg is valid, but desiredVReg is on the stack.  	// 2. Both currentVReg and desiredVReg are valid. @@ -1054,7 +1043,6 @@ func (a *Allocator) reconcileEdge(f Function,  	s := &a.state  	if currentVReg.Valid() { -		desiredState := s.getVRegState(desiredVReg.ID())  		er := desiredState.r  		if er == RealRegInvalid {  			// Case 1: currentVReg is valid, but desiredVReg is on the stack. @@ -1068,9 +1056,9 @@ func (a *Allocator) reconcileEdge(f Function,  			f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion())  			s.releaseRealReg(r) -			s.getVRegState(desiredVReg.ID()).recordReload(f, pred) +			desiredState.recordReload(f, pred)  			f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion()) -			s.useRealReg(r, desiredVReg) +			s.useRealReg(r, desiredState)  			return  		} else {  			// Case 2: Both currentVReg and desiredVReg are valid. @@ -1089,8 +1077,8 @@ func (a *Allocator) reconcileEdge(f Function,  			s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg())  			s.releaseRealReg(r)  			s.releaseRealReg(er) -			s.useRealReg(r, desiredVReg) -			s.useRealReg(er, currentVReg) +			s.useRealReg(r, desiredState) +			s.useRealReg(er, currentState)  			if wazevoapi.RegAllocLoggingEnabled {  				fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er))  			} @@ -1101,7 +1089,7 @@ func (a *Allocator) reconcileEdge(f Function,  				desiredVReg.ID(), a.regInfo.RealRegName(r),  			)  		} -		if currentReg := s.getVRegState(desiredVReg.ID()).r; currentReg != RealRegInvalid { +		if currentReg := desiredState.r; currentReg != RealRegInvalid {  			// Case 3: Desired is on a different register than `r` and currentReg is not valid.  			// We simply need to move the desired value to the register.  			f.InsertMoveBefore( @@ -1113,14 +1101,14 @@ func (a *Allocator) reconcileEdge(f Function,  		} else {  			// Case 4: Both currentVReg and desiredVReg are not valid.  			// We simply need to reload the desired value into the register. -			s.getVRegState(desiredVReg.ID()).recordReload(f, pred) +			desiredState.recordReload(f, pred)  			f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())  		} -		s.useRealReg(r, desiredVReg) +		s.useRealReg(r, desiredState)  	}  } -func (a *Allocator) scheduleSpills(f Function) { +func (a *Allocator[I, B, F]) scheduleSpills(f F) {  	states := a.state.vrStates  	for i := 0; i <= states.MaxIDEncountered(); i++ {  		vs := states.Get(i) @@ -1133,7 +1121,7 @@ func (a *Allocator) scheduleSpills(f Function) {  	}  } -func (a *Allocator) scheduleSpill(f Function, vs *vrState) { +func (a *Allocator[I, B, F]) scheduleSpill(f F, vs *vrState[I, B, F]) {  	v := vs.v  	// If the value is the phi value, we need to insert a spill after each phi definition.  	if vs.isPhi { @@ -1146,10 +1134,11 @@ func (a *Allocator) scheduleSpill(f Function, vs *vrState) {  	pos := vs.lca  	definingBlk := vs.defBlk  	r := RealRegInvalid -	if definingBlk == nil { +	var nilBlk B +	if definingBlk == nilBlk {  		panic(fmt.Sprintf("BUG: definingBlk should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))  	} -	if pos == nil { +	if pos == nilBlk {  		panic(fmt.Sprintf("BUG: pos should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))  	} @@ -1159,7 +1148,7 @@ func (a *Allocator) scheduleSpill(f Function, vs *vrState) {  	for pos != definingBlk {  		st := a.getOrAllocateBlockState(pos.ID())  		for rr := RealReg(0); rr < 64; rr++ { -			if st.startRegs.get(rr) == v { +			if vs := st.startRegs.get(rr); vs != nil && vs.v == v {  				r = rr  				// Already in the register, so we can place the spill at the beginning of the block.  				break @@ -1192,7 +1181,7 @@ func (a *Allocator) scheduleSpill(f Function, vs *vrState) {  }  // Reset resets the allocator's internal state so that it can be reused. -func (a *Allocator) Reset() { +func (a *Allocator[I, B, F]) Reset() {  	a.state.reset()  	a.blockStates.Reset()  	a.phiDefInstListPool.Reset() diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go index 04a8e8f4d..ce84c9c0c 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go @@ -46,52 +46,50 @@ func (rs RegSet) Range(f func(allocatedRealReg RealReg)) {  	}  } -type regInUseSet [64]VReg +type regInUseSet[I Instr, B Block[I], F Function[I, B]] [64]*vrState[I, B, F] -func newRegInUseSet() regInUseSet { -	var ret regInUseSet +func newRegInUseSet[I Instr, B Block[I], F Function[I, B]]() regInUseSet[I, B, F] { +	var ret regInUseSet[I, B, F]  	ret.reset()  	return ret  } -func (rs *regInUseSet) reset() { -	for i := range rs { -		rs[i] = VRegInvalid -	} +func (rs *regInUseSet[I, B, F]) reset() { +	clear(rs[:])  } -func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused +func (rs *regInUseSet[I, B, F]) format(info *RegisterInfo) string { //nolint:unused  	var ret []string  	for i, vr := range rs { -		if vr != VRegInvalid { -			ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.ID())) +		if vr != nil { +			ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.v.ID()))  		}  	}  	return strings.Join(ret, ", ")  } -func (rs *regInUseSet) has(r RealReg) bool { -	return r < 64 && rs[r] != VRegInvalid +func (rs *regInUseSet[I, B, F]) has(r RealReg) bool { +	return r < 64 && rs[r] != nil  } -func (rs *regInUseSet) get(r RealReg) VReg { +func (rs *regInUseSet[I, B, F]) get(r RealReg) *vrState[I, B, F] {  	return rs[r]  } -func (rs *regInUseSet) remove(r RealReg) { -	rs[r] = VRegInvalid +func (rs *regInUseSet[I, B, F]) remove(r RealReg) { +	rs[r] = nil  } -func (rs *regInUseSet) add(r RealReg, vr VReg) { +func (rs *regInUseSet[I, B, F]) add(r RealReg, vr *vrState[I, B, F]) {  	if r >= 64 {  		return  	}  	rs[r] = vr  } -func (rs *regInUseSet) range_(f func(allocatedRealReg RealReg, vr VReg)) { +func (rs *regInUseSet[I, B, F]) range_(f func(allocatedRealReg RealReg, vr *vrState[I, B, F])) {  	for i, vr := range rs { -		if vr != VRegInvalid { +		if vr != nil {  			f(RealReg(i), vr)  		}  	} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go index edfa962b5..47a275a3a 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go @@ -1,43 +1,19 @@  package backend  import ( -	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"  	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"  )  // SSAValueDefinition represents a definition of an SSA value.  type SSAValueDefinition struct { -	// BlockParamValue is valid if Instr == nil -	BlockParamValue ssa.Value - -	// BlkParamVReg is valid if Instr == nil -	BlkParamVReg regalloc.VReg - +	V ssa.Value  	// Instr is not nil if this is a definition from an instruction.  	Instr *ssa.Instruction -	// N is the index of the return value in the instr's return values list. -	N int  	// RefCount is the number of references to the result. -	RefCount int +	RefCount uint32  } +// IsFromInstr returns true if this definition is from an instruction.  func (d *SSAValueDefinition) IsFromInstr() bool {  	return d.Instr != nil  } - -func (d *SSAValueDefinition) IsFromBlockParam() bool { -	return d.Instr == nil -} - -func (d *SSAValueDefinition) SSAValue() ssa.Value { -	if d.IsFromBlockParam() { -		return d.BlockParamValue -	} else { -		r, rs := d.Instr.Returns() -		if d.N == 0 { -			return r -		} else { -			return rs[d.N-1] -		} -	} -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go index 72ce44e26..639429a63 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go @@ -554,17 +554,21 @@ func (c *callEngine) cloneStack(l uintptr) (newSP, newFP, newTop uintptr, newSta  	// Copy the existing contents in the previous Go-allocated stack into the new one.  	var prevStackAligned, newStackAligned []byte  	{ +		//nolint:staticcheck  		sh := (*reflect.SliceHeader)(unsafe.Pointer(&prevStackAligned))  		sh.Data = c.stackTop - relSp -		setSliceLimits(sh, relSp, relSp) +		sh.Len = int(relSp) +		sh.Cap = int(relSp)  	}  	newTop = alignedStackTop(newStack)  	{  		newSP = newTop - relSp  		newFP = newTop - relFp +		//nolint:staticcheck  		sh := (*reflect.SliceHeader)(unsafe.Pointer(&newStackAligned))  		sh.Data = newSP -		setSliceLimits(sh, relSp, relSp) +		sh.Len = int(relSp) +		sh.Cap = int(relSp)  	}  	copy(newStackAligned, prevStackAligned)  	return diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go index 42cc21dcd..eebdba034 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go @@ -275,7 +275,7 @@ func (c *Compiler) LowerToSSA() {  		builder.DefineVariable(variable, value, entryBlock)  		c.setWasmLocalVariable(wasm.Index(i), variable)  	} -	c.declareWasmLocals(entryBlock) +	c.declareWasmLocals()  	c.declareNecessaryVariables()  	c.lowerBody(entryBlock) @@ -295,7 +295,7 @@ func (c *Compiler) setWasmLocalVariable(index wasm.Index, variable ssa.Variable)  }  // declareWasmLocals declares the SSA variables for the Wasm locals. -func (c *Compiler) declareWasmLocals(entry ssa.BasicBlock) { +func (c *Compiler) declareWasmLocals() {  	localCount := wasm.Index(len(c.wasmFunctionTyp.Params))  	for i, typ := range c.wasmFunctionLocalTypes {  		st := WasmTypeToSSAType(typ) @@ -543,11 +543,11 @@ func (c *Compiler) initializeCurrentBlockKnownBounds() {  				cb := &c.bounds[i][c.pointers[i]]  				if cb.id != smallestID {  					same = false -					break  				} else {  					if cb.bound < minBound {  						minBound = cb.bound  					} +					c.pointers[i]++  				}  			} @@ -555,14 +555,6 @@ func (c *Compiler) initializeCurrentBlockKnownBounds() {  				// Absolute address cannot be used in the intersection since the value might be only defined in one of the predecessors.  				c.recordKnownSafeBound(smallestID, minBound, ssa.ValueInvalid)  			} - -			// Move pointer(s) for the smallest ID forward (if same, move all). -			for i := 0; i < preds; i++ { -				cb := &c.bounds[i][c.pointers[i]] -				if cb.id == smallestID { -					c.pointers[i]++ -				} -			}  		}  	}  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go index ff963e605..e73debbd1 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go @@ -1538,8 +1538,7 @@ func (c *Compiler) lowerCurrentOpcode() {  		builder.SetCurrentBlock(elseBlk)  	case wasm.OpcodeBrTable: -		labels := state.tmpForBrTable -		labels = labels[:0] +		labels := state.tmpForBrTable[:0]  		labelCount := c.readI32u()  		for i := 0; i < int(labelCount); i++ {  			labels = append(labels, c.readI32u()) @@ -1557,6 +1556,7 @@ func (c *Compiler) lowerCurrentOpcode() {  		} else {  			c.lowerBrTable(labels, index)  		} +		state.tmpForBrTable = labels // reuse the temporary slice for next use.  		state.unreachable = true  	case wasm.OpcodeNop: @@ -4068,13 +4068,14 @@ func (c *Compiler) lowerBrTable(labels []uint32, index ssa.Value) {  		numArgs = len(f.blockType.Results)  	} -	targets := make([]ssa.BasicBlock, len(labels)) +	varPool := builder.VarLengthPool() +	trampolineBlockIDs := varPool.Allocate(len(labels))  	// We need trampoline blocks since depending on the target block structure, we might end up inserting moves before jumps,  	// which cannot be done with br_table. Instead, we can do such per-block moves in the trampoline blocks.  	// At the linking phase (very end of the backend), we can remove the unnecessary jumps, and therefore no runtime overhead.  	currentBlk := builder.CurrentBlock() -	for i, l := range labels { +	for _, l := range labels {  		// Args are always on the top of the stack. Note that we should not share the args slice  		// among the jump instructions since the args are modified during passes (e.g. redundant phi elimination).  		args := c.nPeekDup(numArgs) @@ -4082,17 +4083,17 @@ func (c *Compiler) lowerBrTable(labels []uint32, index ssa.Value) {  		trampoline := builder.AllocateBasicBlock()  		builder.SetCurrentBlock(trampoline)  		c.insertJumpToBlock(args, targetBlk) -		targets[i] = trampoline +		trampolineBlockIDs = trampolineBlockIDs.Append(builder.VarLengthPool(), ssa.Value(trampoline.ID()))  	}  	builder.SetCurrentBlock(currentBlk)  	// If the target block has no arguments, we can just jump to the target block.  	brTable := builder.AllocateInstruction() -	brTable.AsBrTable(index, targets) +	brTable.AsBrTable(index, trampolineBlockIDs)  	builder.InsertInstruction(brTable) -	for _, trampoline := range targets { -		builder.Seal(trampoline) +	for _, trampolineID := range trampolineBlockIDs.View() { +		builder.Seal(builder.BasicBlock(ssa.BasicBlockID(trampolineID)))  	}  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id.go index 1296706f5..5b055d127 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id.go @@ -1,5 +1,3 @@ -//go:build go1.21 -  package frontend  import ( diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id_old.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id_old.go deleted file mode 100644 index 2e786a160..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id_old.go +++ /dev/null @@ -1,17 +0,0 @@ -//go:build !go1.21 - -// TODO: delete after the floor Go version is 1.21 - -package frontend - -import ( -	"sort" - -	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" -) - -func sortSSAValueIDs(IDs []ssa.ValueID) { -	sort.SliceStable(IDs, func(i, j int) bool { -		return int(IDs[i]) < int(IDs[j]) -	}) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/hostmodule.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/hostmodule.go index 8da7347a9..800a5d2a8 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/hostmodule.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/hostmodule.go @@ -16,6 +16,7 @@ func buildHostModuleOpaque(m *wasm.Module, listeners []experimental.FunctionList  	binary.LittleEndian.PutUint64(ret[0:], uint64(uintptr(unsafe.Pointer(m))))  	if len(listeners) > 0 { +		//nolint:staticcheck  		sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&listeners))  		binary.LittleEndian.PutUint64(ret[8:], uint64(sliceHeader.Data))  		binary.LittleEndian.PutUint64(ret[16:], uint64(sliceHeader.Len)) @@ -33,6 +34,7 @@ func buildHostModuleOpaque(m *wasm.Module, listeners []experimental.FunctionList  func hostModuleFromOpaque(opaqueBegin uintptr) *wasm.Module {  	var opaqueViewOverSlice []byte +	//nolint:staticcheck  	sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverSlice))  	sh.Data = opaqueBegin  	sh.Len = 32 @@ -42,6 +44,7 @@ func hostModuleFromOpaque(opaqueBegin uintptr) *wasm.Module {  func hostModuleListenersSliceFromOpaque(opaqueBegin uintptr) []experimental.FunctionListener {  	var opaqueViewOverSlice []byte +	//nolint:staticcheck  	sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverSlice))  	sh.Data = opaqueBegin  	sh.Len = 32 @@ -51,9 +54,11 @@ func hostModuleListenersSliceFromOpaque(opaqueBegin uintptr) []experimental.Func  	l := binary.LittleEndian.Uint64(opaqueViewOverSlice[16:])  	c := binary.LittleEndian.Uint64(opaqueViewOverSlice[24:])  	var ret []experimental.FunctionListener +	//nolint:staticcheck  	sh = (*reflect.SliceHeader)(unsafe.Pointer(&ret))  	sh.Data = uintptr(b) -	setSliceLimits(sh, uintptr(l), uintptr(c)) +	sh.Len = int(l) +	sh.Cap = int(c)  	return ret  } @@ -62,6 +67,7 @@ func hostModuleGoFuncFromOpaque[T any](index int, opaqueBegin uintptr) T {  	ptr := opaqueBegin + offset  	var opaqueViewOverFunction []byte +	//nolint:staticcheck  	sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverFunction))  	sh.Data = ptr  	sh.Len = 16 diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect.go deleted file mode 100644 index 6a03fc65c..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build !tinygo - -package wazevo - -import "reflect" - -// setSliceLimits sets both Cap and Len for the given reflected slice. -func setSliceLimits(s *reflect.SliceHeader, l, c uintptr) { -	s.Len = int(l) -	s.Cap = int(c) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect_tinygo.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect_tinygo.go deleted file mode 100644 index eda3e706a..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect_tinygo.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build tinygo - -package wazevo - -import "reflect" - -// setSliceLimits sets both Cap and Len for the given reflected slice. -func setSliceLimits(s *reflect.SliceHeader, l, c uintptr) { -	s.Len = l -	s.Cap = c -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go index 39627b989..cf7f14d3b 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go @@ -34,9 +34,6 @@ type BasicBlock interface {  	// The returned Value is the definition of the param in this block.  	Param(i int) Value -	// InsertInstruction inserts an instruction that implements Value into the tail of this block. -	InsertInstruction(raw *Instruction) -  	// Root returns the root instruction of this block.  	Root() *Instruction @@ -81,7 +78,7 @@ type (  		rootInstr, currentInstr *Instruction  		// params are Values that represent parameters to a basicBlock.  		// Each parameter can be considered as an output of PHI instruction in traditional SSA. -		params  []Value +		params  Values  		preds   []basicBlockPredecessorInfo  		success []*basicBlock  		// singlePred is the alias to preds[0] for fast lookup, and only set after Seal is called. @@ -179,23 +176,23 @@ func (bb *basicBlock) ReturnBlock() bool {  // AddParam implements BasicBlock.AddParam.  func (bb *basicBlock) AddParam(b Builder, typ Type) Value {  	paramValue := b.allocateValue(typ) -	bb.params = append(bb.params, paramValue) +	bb.params = bb.params.Append(&b.(*builder).varLengthPool, paramValue)  	return paramValue  }  // addParamOn adds a parameter to this block whose value is already allocated. -func (bb *basicBlock) addParamOn(value Value) { -	bb.params = append(bb.params, value) +func (bb *basicBlock) addParamOn(b *builder, value Value) { +	bb.params = bb.params.Append(&b.varLengthPool, value)  }  // Params implements BasicBlock.Params.  func (bb *basicBlock) Params() int { -	return len(bb.params) +	return len(bb.params.View())  }  // Param implements BasicBlock.Param.  func (bb *basicBlock) Param(i int) Value { -	return bb.params[i] +	return bb.params.View()[i]  }  // Valid implements BasicBlock.Valid. @@ -208,8 +205,8 @@ func (bb *basicBlock) Sealed() bool {  	return bb.sealed  } -// InsertInstruction implements BasicBlock.InsertInstruction. -func (bb *basicBlock) InsertInstruction(next *Instruction) { +// insertInstruction implements BasicBlock.InsertInstruction. +func (bb *basicBlock) insertInstruction(b *builder, next *Instruction) {  	current := bb.currentInstr  	if current != nil {  		current.next = next @@ -221,12 +218,12 @@ func (bb *basicBlock) InsertInstruction(next *Instruction) {  	switch next.opcode {  	case OpcodeJump, OpcodeBrz, OpcodeBrnz: -		target := next.blk.(*basicBlock) -		target.addPred(bb, next) +		target := BasicBlockID(next.rValue) +		b.basicBlock(target).addPred(bb, next)  	case OpcodeBrTable: -		for _, _target := range next.targets { -			target := _target.(*basicBlock) -			target.addPred(bb, next) +		for _, _target := range next.rValues.View() { +			target := BasicBlockID(_target) +			b.basicBlock(target).addPred(bb, next)  		}  	}  } @@ -268,7 +265,7 @@ func (bb *basicBlock) Tail() *Instruction {  // reset resets the basicBlock to its initial state so that it can be reused for another function.  func resetBasicBlock(bb *basicBlock) { -	bb.params = bb.params[:0] +	bb.params = ValuesNil  	bb.rootInstr, bb.currentInstr = nil, nil  	bb.preds = bb.preds[:0]  	bb.success = bb.success[:0] @@ -310,8 +307,8 @@ func (bb *basicBlock) addPred(blk BasicBlock, branch *Instruction) {  // formatHeader returns the string representation of the header of the basicBlock.  func (bb *basicBlock) formatHeader(b Builder) string { -	ps := make([]string, len(bb.params)) -	for i, p := range bb.params { +	ps := make([]string, len(bb.params.View())) +	for i, p := range bb.params.View() {  		ps[i] = p.formatWithType(b)  	} @@ -339,7 +336,9 @@ func (bb *basicBlock) validate(b *builder) {  	if len(bb.preds) > 0 {  		for _, pred := range bb.preds {  			if pred.branch.opcode != OpcodeBrTable { -				if target := pred.branch.blk; target != bb { +				blockID := int(pred.branch.rValue) +				target := b.basicBlocksPool.View(blockID) +				if target != bb {  					panic(fmt.Sprintf("BUG: '%s' is not branch to %s, but to %s",  						pred.branch.Format(b), bb.Name(), target.Name()))  				} @@ -349,14 +348,14 @@ func (bb *basicBlock) validate(b *builder) {  			if bb.ReturnBlock() {  				exp = len(b.currentSignature.Results)  			} else { -				exp = len(bb.params) +				exp = len(bb.params.View())  			}  			if len(pred.branch.vs.View()) != exp {  				panic(fmt.Sprintf(  					"BUG: len(argument at %s) != len(params at %s): %d != %d: %s",  					pred.blk.Name(), bb.Name(), -					len(pred.branch.vs.View()), len(bb.params), pred.branch.Format(b), +					len(pred.branch.vs.View()), len(bb.params.View()), pred.branch.Format(b),  				))  			} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go index e1471edc3..fb98298f7 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go @@ -1,5 +1,3 @@ -//go:build go1.21 -  package ssa  import ( diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort_old.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort_old.go deleted file mode 100644 index 9dc881dae..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort_old.go +++ /dev/null @@ -1,24 +0,0 @@ -//go:build !go1.21 - -// TODO: delete after the floor Go version is 1.21 - -package ssa - -import "sort" - -func sortBlocks(blocks []*basicBlock) { -	sort.SliceStable(blocks, func(i, j int) bool { -		iBlk, jBlk := blocks[i], blocks[j] -		if jBlk.ReturnBlock() { -			return true -		} -		if iBlk.ReturnBlock() { -			return false -		} -		iRoot, jRoot := iBlk.rootInstr, jBlk.rootInstr -		if iRoot == nil || jRoot == nil { // For testing. -			return true -		} -		return iBlk.rootInstr.id < jBlk.rootInstr.id -	}) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go index 0b700c4b1..43dd7d292 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go @@ -94,9 +94,9 @@ type Builder interface {  	// Returns nil if there's no unseen BasicBlock.  	BlockIteratorNext() BasicBlock -	// ValueRefCounts returns the map of ValueID to its reference count. -	// The returned slice must not be modified. -	ValueRefCounts() []int +	// ValuesInfo returns the data per Value used to lower the SSA in backend. +	// This is indexed by ValueID. +	ValuesInfo() []ValueInfo  	// BlockIteratorReversePostOrderBegin is almost the same as BlockIteratorBegin except it returns the BasicBlock in the reverse post-order.  	// This is available after RunPasses is run. @@ -129,20 +129,24 @@ type Builder interface {  	// InsertZeroValue inserts a zero value constant instruction of the given type.  	InsertZeroValue(t Type) + +	// BasicBlock returns the BasicBlock of the given ID. +	BasicBlock(id BasicBlockID) BasicBlock + +	// InstructionOfValue returns the Instruction that produces the given Value or nil if the Value is not produced by any Instruction. +	InstructionOfValue(v Value) *Instruction  }  // NewBuilder returns a new Builder implementation.  func NewBuilder() Builder {  	return &builder{ -		instructionsPool:               wazevoapi.NewPool[Instruction](resetInstruction), -		basicBlocksPool:                wazevoapi.NewPool[basicBlock](resetBasicBlock), -		varLengthBasicBlockPool:        wazevoapi.NewVarLengthPool[BasicBlock](), -		varLengthPool:                  wazevoapi.NewVarLengthPool[Value](), -		valueAnnotations:               make(map[ValueID]string), -		signatures:                     make(map[SignatureID]*Signature), -		valueIDAliases:                 make(map[ValueID]Value), -		redundantParameterIndexToValue: make(map[int]Value), -		returnBlk:                      &basicBlock{id: basicBlockIDReturnBlock}, +		instructionsPool:        wazevoapi.NewPool[Instruction](resetInstruction), +		basicBlocksPool:         wazevoapi.NewPool[basicBlock](resetBasicBlock), +		varLengthBasicBlockPool: wazevoapi.NewVarLengthPool[BasicBlock](), +		varLengthPool:           wazevoapi.NewVarLengthPool[Value](), +		valueAnnotations:        make(map[ValueID]string), +		signatures:              make(map[SignatureID]*Signature), +		returnBlk:               &basicBlock{id: basicBlockIDReturnBlock},  	}  } @@ -159,19 +163,16 @@ type builder struct {  	currentBB                     *basicBlock  	returnBlk                     *basicBlock -	// variables track the types for Variable with the index regarded Variable. -	variables []Type  	// nextValueID is used by builder.AllocateValue.  	nextValueID ValueID  	// nextVariable is used by builder.AllocateVariable.  	nextVariable Variable -	valueIDAliases   map[ValueID]Value +	// valueAnnotations contains the annotations for each Value, only used for debugging.  	valueAnnotations map[ValueID]string -	// valueRefCounts is used to lower the SSA in backend, and will be calculated -	// by the last SSA-level optimization pass. -	valueRefCounts []int +	// valuesInfo contains the data per Value used to lower the SSA in backend. This is indexed by ValueID. +	valuesInfo []ValueInfo  	// dominators stores the immediate dominator of each BasicBlock.  	// The index is blockID of the BasicBlock. @@ -184,12 +185,10 @@ type builder struct {  	loopNestingForestRoots []BasicBlock  	// The followings are used for optimization passes/deterministic compilation. -	instStack                      []*Instruction -	valueIDToInstruction           []*Instruction -	blkStack                       []*basicBlock -	blkStack2                      []*basicBlock -	ints                           []int -	redundantParameterIndexToValue map[int]Value +	instStack       []*Instruction +	blkStack        []*basicBlock +	blkStack2       []*basicBlock +	redundantParams []redundantParam  	// blockIterCur is used to implement blockIteratorBegin and blockIteratorNext.  	blockIterCur int @@ -207,6 +206,34 @@ type builder struct {  	zeros [typeEnd]Value  } +// ValueInfo contains the data per Value used to lower the SSA in backend. +type ValueInfo struct { +	// RefCount is the reference count of the Value. +	RefCount uint32 +	alias    Value +} + +// redundantParam is a pair of the index of the redundant parameter and the Value. +// This is used to eliminate the redundant parameters in the optimization pass. +type redundantParam struct { +	// index is the index of the redundant parameter in the basicBlock. +	index int +	// uniqueValue is the Value which is passed to the redundant parameter. +	uniqueValue Value +} + +// BasicBlock implements Builder.BasicBlock. +func (b *builder) BasicBlock(id BasicBlockID) BasicBlock { +	return b.basicBlock(id) +} + +func (b *builder) basicBlock(id BasicBlockID) *basicBlock { +	if id == basicBlockIDReturnBlock { +		return b.returnBlk +	} +	return b.basicBlocksPool.View(int(id)) +} +  // InsertZeroValue implements Builder.InsertZeroValue.  func (b *builder) InsertZeroValue(t Type) {  	if b.zeros[t].Valid() { @@ -256,7 +283,7 @@ func (b *builder) Init(s *Signature) {  		sig.used = false  	} -	b.ints = b.ints[:0] +	b.redundantParams = b.redundantParams[:0]  	b.blkStack = b.blkStack[:0]  	b.blkStack2 = b.blkStack2[:0]  	b.dominators = b.dominators[:0] @@ -265,17 +292,11 @@ func (b *builder) Init(s *Signature) {  	for v := ValueID(0); v < b.nextValueID; v++ {  		delete(b.valueAnnotations, v) -		delete(b.valueIDAliases, v) -		b.valueRefCounts[v] = 0 -		b.valueIDToInstruction[v] = nil +		b.valuesInfo[v] = ValueInfo{alias: ValueInvalid}  	}  	b.nextValueID = 0  	b.reversePostOrderedBasicBlocks = b.reversePostOrderedBasicBlocks[:0]  	b.doneBlockLayout = false -	for i := range b.valueRefCounts { -		b.valueRefCounts[i] = 0 -	} -  	b.currentSourceOffset = sourceOffsetUnknown  } @@ -355,7 +376,7 @@ func (b *builder) Idom(blk BasicBlock) BasicBlock {  // InsertInstruction implements Builder.InsertInstruction.  func (b *builder) InsertInstruction(instr *Instruction) { -	b.currentBB.InsertInstruction(instr) +	b.currentBB.insertInstruction(b, instr)  	if l := b.currentSourceOffset; l.Valid() {  		// Emit the source offset info only when the instruction has side effect because @@ -377,7 +398,7 @@ func (b *builder) InsertInstruction(instr *Instruction) {  	}  	r1 := b.allocateValue(t1) -	instr.rValue = r1 +	instr.rValue = r1.setInstructionID(instr.id)  	tsl := len(ts)  	if tsl == 0 { @@ -386,20 +407,14 @@ func (b *builder) InsertInstruction(instr *Instruction) {  	rValues := b.varLengthPool.Allocate(tsl)  	for i := 0; i < tsl; i++ { -		rValues = rValues.Append(&b.varLengthPool, b.allocateValue(ts[i])) +		rn := b.allocateValue(ts[i]) +		rValues = rValues.Append(&b.varLengthPool, rn.setInstructionID(instr.id))  	}  	instr.rValues = rValues  }  // DefineVariable implements Builder.DefineVariable.  func (b *builder) DefineVariable(variable Variable, value Value, block BasicBlock) { -	if b.variables[variable].invalid() { -		panic("BUG: trying to define variable " + variable.String() + " but is not declared yet") -	} - -	if b.variables[variable] != value.Type() { -		panic(fmt.Sprintf("BUG: inconsistent type for variable %d: expected %s but got %s", variable, b.variables[variable], value.Type())) -	}  	bb := block.(*basicBlock)  	bb.lastDefinitions[variable] = value  } @@ -426,20 +441,9 @@ func (b *builder) EntryBlock() BasicBlock {  // DeclareVariable implements Builder.DeclareVariable.  func (b *builder) DeclareVariable(typ Type) Variable { -	v := b.allocateVariable() -	iv := int(v) -	if l := len(b.variables); l <= iv { -		b.variables = append(b.variables, make([]Type, 2*(l+1))...) -	} -	b.variables[v] = typ -	return v -} - -// allocateVariable allocates a new variable. -func (b *builder) allocateVariable() (ret Variable) { -	ret = b.nextVariable +	v := b.nextVariable  	b.nextVariable++ -	return +	return v.setType(typ)  }  // allocateValue implements Builder.AllocateValue. @@ -475,8 +479,7 @@ func (b *builder) findValueInLinearPath(variable Variable, blk *basicBlock) Valu  // MustFindValue implements Builder.MustFindValue.  func (b *builder) MustFindValue(variable Variable) Value { -	typ := b.definedVariableType(variable) -	return b.findValue(typ, variable, b.currentBB) +	return b.findValue(variable.getType(), variable, b.currentBB)  }  // findValue recursively tries to find the latest definition of a `variable`. The algorithm is described in @@ -504,7 +507,7 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value  		return value  	} else if blk.EntryBlock() {  		// If this is the entry block, we reach the uninitialized variable which has zero value. -		return b.zeros[b.definedVariableType(variable)] +		return b.zeros[variable.getType()]  	}  	if pred := blk.singlePred; pred != nil { @@ -536,14 +539,13 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value  	if uniqueValue != ValueInvalid {  		// If all the predecessors have the same definition, we can use that value. -		b.DefineVariable(variable, uniqueValue, blk)  		b.alias(tmpValue, uniqueValue)  		return uniqueValue  	} else {  		// Otherwise, add the tmpValue to this block as a parameter which may or may not be redundant, but  		// later we eliminate trivial params in an optimization pass. This must be done before finding the  		// definitions in the predecessors so that we can break the cycle. -		blk.addParamOn(tmpValue) +		blk.addParamOn(b, tmpValue)  		// After the new param is added, we have to manipulate the original branching instructions  		// in predecessors so that they would pass the definition of `variable` as the argument to  		// the newly added PHI. @@ -566,8 +568,8 @@ func (b *builder) Seal(raw BasicBlock) {  	for _, v := range blk.unknownValues {  		variable, phiValue := v.variable, v.value -		typ := b.definedVariableType(variable) -		blk.addParamOn(phiValue) +		typ := variable.getType() +		blk.addParamOn(b, phiValue)  		for i := range blk.preds {  			pred := &blk.preds[i]  			predValue := b.findValue(typ, variable, pred.blk) @@ -579,15 +581,6 @@ func (b *builder) Seal(raw BasicBlock) {  	}  } -// definedVariableType returns the type of the given variable. If the variable is not defined yet, it panics. -func (b *builder) definedVariableType(variable Variable) Type { -	typ := b.variables[variable] -	if typ.invalid() { -		panic(fmt.Sprintf("%s is not defined yet", variable)) -	} -	return typ -} -  // Format implements Builder.Format.  func (b *builder) Format() string {  	str := strings.Builder{} @@ -689,15 +682,24 @@ func (b *builder) blockIteratorReversePostOrderNext() *basicBlock {  	}  } -// ValueRefCounts implements Builder.ValueRefCounts. -func (b *builder) ValueRefCounts() []int { -	return b.valueRefCounts +// ValuesInfo implements Builder.ValuesInfo. +func (b *builder) ValuesInfo() []ValueInfo { +	return b.valuesInfo  }  // alias records the alias of the given values. The alias(es) will be  // eliminated in the optimization pass via resolveArgumentAlias.  func (b *builder) alias(dst, src Value) { -	b.valueIDAliases[dst.ID()] = src +	did := int(dst.ID()) +	if did >= len(b.valuesInfo) { +		l := did + 1 - len(b.valuesInfo) +		b.valuesInfo = append(b.valuesInfo, make([]ValueInfo, l)...) +		view := b.valuesInfo[len(b.valuesInfo)-l:] +		for i := range view { +			view[i].alias = ValueInvalid +		} +	} +	b.valuesInfo[did].alias = src  }  // resolveArgumentAlias resolves the alias of the arguments of the given instruction. @@ -722,10 +724,13 @@ func (b *builder) resolveArgumentAlias(instr *Instruction) {  // resolveAlias resolves the alias of the given value.  func (b *builder) resolveAlias(v Value) Value { +	info := b.valuesInfo +	l := ValueID(len(info))  	// Some aliases are chained, so we need to resolve them recursively.  	for { -		if src, ok := b.valueIDAliases[v.ID()]; ok { -			v = src +		vid := v.ID() +		if vid < l && info[vid].alias.Valid() { +			v = info[vid].alias  		} else {  			break  		} @@ -773,3 +778,13 @@ func (b *builder) LoopNestingForestRoots() []BasicBlock {  func (b *builder) LowestCommonAncestor(blk1, blk2 BasicBlock) BasicBlock {  	return b.sparseTree.findLCA(blk1.ID(), blk2.ID())  } + +// InstructionOfValue returns the instruction that produces the given Value, or nil +// if the Value is not produced by any instruction. +func (b *builder) InstructionOfValue(v Value) *Instruction { +	instrID := v.instructionID() +	if instrID <= 0 { +		return nil +	} +	return b.instructionsPool.View(instrID - 1) +} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go index 3e3482efc..9a3d1da6e 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go @@ -25,11 +25,13 @@ type Instruction struct {  	v3         Value  	vs         Values  	typ        Type -	blk        BasicBlock -	targets    []BasicBlock  	prev, next *Instruction -	rValue         Value +	// rValue is the (first) return value of this instruction. +	// For branching instructions except for OpcodeBrTable, they hold BlockID to jump cast to Value. +	rValue Value +	// rValues are the rest of the return values of this instruction. +	// For OpcodeBrTable, it holds the list of BlockID to jump cast to Value.  	rValues        Values  	gid            InstructionGroupID  	sourceOffset   SourceOffset @@ -105,6 +107,9 @@ type InstructionGroupID uint32  // Returns Value(s) produced by this instruction if any.  // The `first` is the first return value, and `rest` is the rest of the values.  func (i *Instruction) Returns() (first Value, rest []Value) { +	if i.IsBranching() { +		return ValueInvalid, nil +	}  	return i.rValue, i.rValues.View()  } @@ -2077,7 +2082,7 @@ func (i *Instruction) InvertBrx() {  }  // BranchData returns the branch data for this instruction necessary for backends. -func (i *Instruction) BranchData() (condVal Value, blockArgs []Value, target BasicBlock) { +func (i *Instruction) BranchData() (condVal Value, blockArgs []Value, target BasicBlockID) {  	switch i.opcode {  	case OpcodeJump:  		condVal = ValueInvalid @@ -2087,17 +2092,17 @@ func (i *Instruction) BranchData() (condVal Value, blockArgs []Value, target Bas  		panic("BUG")  	}  	blockArgs = i.vs.View() -	target = i.blk +	target = BasicBlockID(i.rValue)  	return  }  // BrTableData returns the branch table data for this instruction necessary for backends. -func (i *Instruction) BrTableData() (index Value, targets []BasicBlock) { +func (i *Instruction) BrTableData() (index Value, targets Values) {  	if i.opcode != OpcodeBrTable {  		panic("BUG: BrTableData only available for OpcodeBrTable")  	}  	index = i.v -	targets = i.targets +	targets = i.rValues  	return  } @@ -2105,7 +2110,7 @@ func (i *Instruction) BrTableData() (index Value, targets []BasicBlock) {  func (i *Instruction) AsJump(vs Values, target BasicBlock) *Instruction {  	i.opcode = OpcodeJump  	i.vs = vs -	i.blk = target +	i.rValue = Value(target.ID())  	return i  } @@ -2130,7 +2135,7 @@ func (i *Instruction) AsBrz(v Value, args Values, target BasicBlock) {  	i.opcode = OpcodeBrz  	i.v = v  	i.vs = args -	i.blk = target +	i.rValue = Value(target.ID())  }  // AsBrnz initializes this instruction as a branch-if-not-zero instruction with OpcodeBrnz. @@ -2138,15 +2143,16 @@ func (i *Instruction) AsBrnz(v Value, args Values, target BasicBlock) *Instructi  	i.opcode = OpcodeBrnz  	i.v = v  	i.vs = args -	i.blk = target +	i.rValue = Value(target.ID())  	return i  }  // AsBrTable initializes this instruction as a branch-table instruction with OpcodeBrTable. -func (i *Instruction) AsBrTable(index Value, targets []BasicBlock) { +// targets is a list of basic block IDs cast to Values. +func (i *Instruction) AsBrTable(index Value, targets Values) {  	i.opcode = OpcodeBrTable  	i.v = index -	i.targets = targets +	i.rValues = targets  }  // AsCall initializes this instruction as a call instruction with OpcodeCall. @@ -2531,7 +2537,8 @@ func (i *Instruction) Format(b Builder) string {  		if i.IsFallthroughJump() {  			vs[0] = " fallthrough"  		} else { -			vs[0] = " " + i.blk.(*basicBlock).Name() +			blockId := BasicBlockID(i.rValue) +			vs[0] = " " + b.BasicBlock(blockId).Name()  		}  		for idx := range view {  			vs[idx+1] = view[idx].Format(b) @@ -2542,7 +2549,8 @@ func (i *Instruction) Format(b Builder) string {  		view := i.vs.View()  		vs := make([]string, len(view)+2)  		vs[0] = " " + i.v.Format(b) -		vs[1] = i.blk.(*basicBlock).Name() +		blockId := BasicBlockID(i.rValue) +		vs[1] = b.BasicBlock(blockId).Name()  		for idx := range view {  			vs[idx+2] = view[idx].Format(b)  		} @@ -2551,8 +2559,8 @@ func (i *Instruction) Format(b Builder) string {  		// `BrTable index, [label1, label2, ... labelN]`  		instSuffix = fmt.Sprintf(" %s", i.v.Format(b))  		instSuffix += ", [" -		for i, target := range i.targets { -			blk := target.(*basicBlock) +		for i, target := range i.rValues.View() { +			blk := b.BasicBlock(BasicBlockID(target))  			if i == 0 {  				instSuffix += blk.Name()  			} else { @@ -2621,11 +2629,12 @@ func (i *Instruction) Format(b Builder) string {  	instr := i.opcode.String() + instSuffix  	var rvs []string -	if rv := i.rValue; rv.Valid() { -		rvs = append(rvs, rv.formatWithType(b)) +	r1, rs := i.Returns() +	if r1.Valid() { +		rvs = append(rvs, r1.formatWithType(b))  	} -	for _, v := range i.rValues.View() { +	for _, v := range rs {  		rvs = append(rvs, v.formatWithType(b))  	} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go index 89ec34b7e..b9763791d 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go @@ -112,7 +112,7 @@ func passDeadBlockEliminationOpt(b *builder) {  // This requires the reverse post-order traversal to be calculated before calling this function,  // hence passCalculateImmediateDominators must be called before this.  func passRedundantPhiEliminationOpt(b *builder) { -	redundantParameterIndexes := b.ints[:0] // reuse the slice from previous iterations. +	redundantParams := b.redundantParams[:0] // reuse the slice from previous iterations.  	// TODO: this might be costly for large programs, but at least, as far as I did the experiment, it's almost the  	//  same as the single iteration version in terms of the overall compilation time. That *might be* mostly thanks to the fact @@ -128,10 +128,11 @@ func passRedundantPhiEliminationOpt(b *builder) {  		_ = b.blockIteratorReversePostOrderBegin() // skip entry block!  		// Below, we intentionally use the named iteration variable name, as this comes with inevitable nested for loops!  		for blk := b.blockIteratorReversePostOrderNext(); blk != nil; blk = b.blockIteratorReversePostOrderNext() { -			paramNum := len(blk.params) +			params := blk.params.View() +			paramNum := len(params)  			for paramIndex := 0; paramIndex < paramNum; paramIndex++ { -				phiValue := blk.params[paramIndex] +				phiValue := params[paramIndex]  				redundant := true  				nonSelfReferencingValue := ValueInvalid @@ -162,55 +163,58 @@ func passRedundantPhiEliminationOpt(b *builder) {  				}  				if redundant { -					b.redundantParameterIndexToValue[paramIndex] = nonSelfReferencingValue -					redundantParameterIndexes = append(redundantParameterIndexes, paramIndex) +					redundantParams = append(redundantParams, redundantParam{ +						index: paramIndex, uniqueValue: nonSelfReferencingValue, +					})  				}  			} -			if len(b.redundantParameterIndexToValue) == 0 { +			if len(redundantParams) == 0 {  				continue  			}  			changed = true  			// Remove the redundant PHIs from the argument list of branching instructions.  			for predIndex := range blk.preds { -				var cur int +				redundantParamsCur, predParamCur := 0, 0  				predBlk := blk.preds[predIndex]  				branchInst := predBlk.branch  				view := branchInst.vs.View()  				for argIndex, value := range view { -					if _, ok := b.redundantParameterIndexToValue[argIndex]; !ok { -						view[cur] = value -						cur++ +					if len(redundantParams) == redundantParamsCur || +						redundantParams[redundantParamsCur].index != argIndex { +						view[predParamCur] = value +						predParamCur++ +					} else { +						redundantParamsCur++  					}  				} -				branchInst.vs.Cut(cur) +				branchInst.vs.Cut(predParamCur)  			}  			// Still need to have the definition of the value of the PHI (previously as the parameter). -			for _, redundantParamIndex := range redundantParameterIndexes { -				phiValue := blk.params[redundantParamIndex] -				onlyValue := b.redundantParameterIndexToValue[redundantParamIndex] +			for i := range redundantParams { +				redundantValue := &redundantParams[i] +				phiValue := params[redundantValue.index]  				// Create an alias in this block from the only phi argument to the phi value. -				b.alias(phiValue, onlyValue) +				b.alias(phiValue, redundantValue.uniqueValue)  			}  			// Finally, Remove the param from the blk. -			var cur int +			paramsCur, redundantParamsCur := 0, 0  			for paramIndex := 0; paramIndex < paramNum; paramIndex++ { -				param := blk.params[paramIndex] -				if _, ok := b.redundantParameterIndexToValue[paramIndex]; !ok { -					blk.params[cur] = param -					cur++ +				param := params[paramIndex] +				if len(redundantParams) == redundantParamsCur || redundantParams[redundantParamsCur].index != paramIndex { +					params[paramsCur] = param +					paramsCur++ +				} else { +					redundantParamsCur++  				}  			} -			blk.params = blk.params[:cur] +			blk.params.Cut(paramsCur)  			// Clears the map for the next iteration. -			for _, paramIndex := range redundantParameterIndexes { -				delete(b.redundantParameterIndexToValue, paramIndex) -			} -			redundantParameterIndexes = redundantParameterIndexes[:0] +			redundantParams = redundantParams[:0]  		}  		if !changed { @@ -219,7 +223,7 @@ func passRedundantPhiEliminationOpt(b *builder) {  	}  	// Reuse the slice for the future passes. -	b.ints = redundantParameterIndexes +	b.redundantParams = redundantParams  }  // passDeadCodeEliminationOpt traverses all the instructions, and calculates the reference count of each Value, and @@ -231,11 +235,13 @@ func passRedundantPhiEliminationOpt(b *builder) {  // TODO: the algorithm here might not be efficient. Get back to this later.  func passDeadCodeEliminationOpt(b *builder) {  	nvid := int(b.nextValueID) -	if nvid >= len(b.valueRefCounts) { -		b.valueRefCounts = append(b.valueRefCounts, make([]int, nvid-len(b.valueRefCounts)+1)...) -	} -	if nvid >= len(b.valueIDToInstruction) { -		b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, nvid-len(b.valueIDToInstruction)+1)...) +	if nvid >= len(b.valuesInfo) { +		l := nvid - len(b.valuesInfo) + 1 +		b.valuesInfo = append(b.valuesInfo, make([]ValueInfo, l)...) +		view := b.valuesInfo[len(b.valuesInfo)-l:] +		for i := range view { +			view[i].alias = ValueInvalid +		}  	}  	// First, we gather all the instructions with side effects. @@ -255,14 +261,6 @@ func passDeadCodeEliminationOpt(b *builder) {  				// The strict side effect should create different instruction groups.  				gid++  			} - -			r1, rs := cur.Returns() -			if r1.Valid() { -				b.valueIDToInstruction[r1.ID()] = cur -			} -			for _, r := range rs { -				b.valueIDToInstruction[r.ID()] = cur -			}  		}  	} @@ -283,28 +281,28 @@ func passDeadCodeEliminationOpt(b *builder) {  		v1, v2, v3, vs := live.Args()  		if v1.Valid() { -			producingInst := b.valueIDToInstruction[v1.ID()] +			producingInst := b.InstructionOfValue(v1)  			if producingInst != nil {  				liveInstructions = append(liveInstructions, producingInst)  			}  		}  		if v2.Valid() { -			producingInst := b.valueIDToInstruction[v2.ID()] +			producingInst := b.InstructionOfValue(v2)  			if producingInst != nil {  				liveInstructions = append(liveInstructions, producingInst)  			}  		}  		if v3.Valid() { -			producingInst := b.valueIDToInstruction[v3.ID()] +			producingInst := b.InstructionOfValue(v3)  			if producingInst != nil {  				liveInstructions = append(liveInstructions, producingInst)  			}  		}  		for _, v := range vs { -			producingInst := b.valueIDToInstruction[v.ID()] +			producingInst := b.InstructionOfValue(v)  			if producingInst != nil {  				liveInstructions = append(liveInstructions, producingInst)  			} @@ -352,34 +350,19 @@ func (b *builder) incRefCount(id ValueID, from *Instruction) {  	if wazevoapi.SSALoggingEnabled {  		fmt.Printf("v%d referenced from %v\n", id, from.Format(b))  	} -	b.valueRefCounts[id]++ +	info := &b.valuesInfo[id] +	info.RefCount++  }  // passNopInstElimination eliminates the instructions which is essentially a no-op.  func passNopInstElimination(b *builder) { -	if int(b.nextValueID) >= len(b.valueIDToInstruction) { -		b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, int(b.nextValueID)-len(b.valueIDToInstruction)+1)...) -	} - -	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { -		for cur := blk.rootInstr; cur != nil; cur = cur.next { -			r1, rs := cur.Returns() -			if r1.Valid() { -				b.valueIDToInstruction[r1.ID()] = cur -			} -			for _, r := range rs { -				b.valueIDToInstruction[r.ID()] = cur -			} -		} -	} -  	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {  		for cur := blk.rootInstr; cur != nil; cur = cur.next {  			switch cur.Opcode() {  			// TODO: add more logics here.  			case OpcodeIshl, OpcodeSshr, OpcodeUshr:  				x, amount := cur.Arg2() -				definingInst := b.valueIDToInstruction[amount.ID()] +				definingInst := b.InstructionOfValue(amount)  				if definingInst == nil {  					// If there's no defining instruction, that means the amount is coming from the parameter.  					continue diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go index 584b5eade..0118e8b2e 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go @@ -33,7 +33,7 @@ func passLayoutBlocks(b *builder) {  		}  		nonSplitBlocks = append(nonSplitBlocks, blk)  		if i != len(b.reversePostOrderedBasicBlocks)-1 { -			_ = maybeInvertBranches(blk, b.reversePostOrderedBasicBlocks[i+1]) +			_ = maybeInvertBranches(b, blk, b.reversePostOrderedBasicBlocks[i+1])  		}  	} @@ -111,7 +111,7 @@ func passLayoutBlocks(b *builder) {  			}  			fallthroughBranch := blk.currentInstr -			if fallthroughBranch.opcode == OpcodeJump && fallthroughBranch.blk == trampoline { +			if fallthroughBranch.opcode == OpcodeJump && BasicBlockID(fallthroughBranch.rValue) == trampoline.id {  				// This can be lowered as fallthrough at the end of the block.  				b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline)  				trampoline.visited = 1 // mark as inserted. @@ -157,7 +157,7 @@ func (b *builder) markFallthroughJumps() {  	for i, blk := range b.reversePostOrderedBasicBlocks {  		if i < l {  			cur := blk.currentInstr -			if cur.opcode == OpcodeJump && cur.blk == b.reversePostOrderedBasicBlocks[i+1] { +			if cur.opcode == OpcodeJump && BasicBlockID(cur.rValue) == b.reversePostOrderedBasicBlocks[i+1].id {  				cur.AsFallthroughJump()  			}  		} @@ -168,7 +168,7 @@ func (b *builder) markFallthroughJumps() {  // nextInRPO is the next block in the reverse post-order.  //  // Returns true if the branch is inverted for testing purpose. -func maybeInvertBranches(now *basicBlock, nextInRPO *basicBlock) bool { +func maybeInvertBranches(b *builder, now *basicBlock, nextInRPO *basicBlock) bool {  	fallthroughBranch := now.currentInstr  	if fallthroughBranch.opcode == OpcodeBrTable {  		return false @@ -187,7 +187,8 @@ func maybeInvertBranches(now *basicBlock, nextInRPO *basicBlock) bool {  	// So this block has two branches (a conditional branch followed by an unconditional branch) at the end.  	// We can invert the condition of the branch if it makes the fallthrough more likely. -	fallthroughTarget, condTarget := fallthroughBranch.blk.(*basicBlock), condBranch.blk.(*basicBlock) +	fallthroughTarget := b.basicBlock(BasicBlockID(fallthroughBranch.rValue)) +	condTarget := b.basicBlock(BasicBlockID(condBranch.rValue))  	if fallthroughTarget.loopHeader {  		// First, if the tail's target is loopHeader, we don't need to do anything here, @@ -231,8 +232,8 @@ invert:  	}  	condBranch.InvertBrx() -	condBranch.blk = fallthroughTarget -	fallthroughBranch.blk = condTarget +	condBranch.rValue = Value(fallthroughTarget.ID()) +	fallthroughBranch.rValue = Value(condTarget.ID())  	if wazevoapi.SSALoggingEnabled {  		fmt.Printf("inverting branches at %d->%d and %d->%d\n",  			now.ID(), fallthroughTarget.ID(), now.ID(), condTarget.ID()) @@ -275,7 +276,7 @@ func (b *builder) splitCriticalEdge(pred, succ *basicBlock, predInfo *basicBlock  	// Replace originalBranch with the newBranch.  	newBranch := b.AllocateInstruction()  	newBranch.opcode = originalBranch.opcode -	newBranch.blk = trampoline +	newBranch.rValue = Value(trampoline.ID())  	switch originalBranch.opcode {  	case OpcodeJump:  	case OpcodeBrz, OpcodeBrnz: @@ -303,7 +304,7 @@ func (b *builder) splitCriticalEdge(pred, succ *basicBlock, predInfo *basicBlock  		trampoline.validate(b)  	} -	if len(trampoline.params) > 0 { +	if len(trampoline.params.View()) > 0 {  		panic("trampoline should not have params")  	} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/vs.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/vs.go index bcf83cbf8..d906e7e35 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/vs.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/vs.go @@ -15,17 +15,31 @@ import (  //  // Variable is useful to track the SSA Values of a variable in the source program, and  // can be used to find the corresponding latest SSA Value via Builder.FindValue. +// +// Higher 4-bit is used to store Type for this variable.  type Variable uint32  // String implements fmt.Stringer.  func (v Variable) String() string { -	return fmt.Sprintf("var%d", v) +	return fmt.Sprintf("var%d", v&0x0fffffff) +} + +func (v Variable) setType(typ Type) Variable { +	if v >= 1<<28 { +		panic(fmt.Sprintf("Too large variable: %d", v)) +	} +	return Variable(typ)<<28 | v +} + +func (v Variable) getType() Type { +	return Type(v >> 28)  }  // Value represents an SSA value with a type information. The relationship with Variable is 1: N (including 0),  // that means there might be multiple Variable(s) for a Value.  // -// Higher 32-bit is used to store Type for this value. +// 32 to 59-bit is used to store the unique identifier of the Instruction that generates this value if any. +// 60 to 63-bit is used to store Type for this value.  type Value uint64  // ValueID is the lower 32bit of Value, which is the pure identifier of Value without type info. @@ -33,7 +47,7 @@ type ValueID uint32  const (  	valueIDInvalid ValueID = math.MaxUint32 -	ValueInvalid   Value   = Value(valueIDInvalid) +	ValueInvalid           = Value(valueIDInvalid)  )  // Format creates a debug string for this Value using the data stored in Builder. @@ -54,7 +68,7 @@ func (v Value) formatWithType(b Builder) (ret string) {  	if wazevoapi.SSALoggingEnabled { // This is useful to check live value analysis bugs.  		if bd := b.(*builder); bd.donePostBlockLayoutPasses {  			id := v.ID() -			ret += fmt.Sprintf("(ref=%d)", bd.valueRefCounts[id]) +			ret += fmt.Sprintf("(ref=%d)", bd.valuesInfo[id].RefCount)  		}  	}  	return ret @@ -67,7 +81,7 @@ func (v Value) Valid() bool {  // Type returns the Type of this value.  func (v Value) Type() Type { -	return Type(v >> 32) +	return Type(v >> 60)  }  // ID returns the valueID of this value. @@ -77,7 +91,20 @@ func (v Value) ID() ValueID {  // setType sets a type to this Value and returns the updated Value.  func (v Value) setType(typ Type) Value { -	return v | Value(typ)<<32 +	return v | Value(typ)<<60 +} + +// setInstructionID sets an Instruction.id to this Value and returns the updated Value. +func (v Value) setInstructionID(id int) Value { +	if id < 0 || uint(id) >= 1<<28 { +		panic(fmt.Sprintf("Too large instruction ID: %d", id)) +	} +	return v | Value(id)<<32 +} + +// instructionID() returns the Instruction.id of this Value. +func (v Value) instructionID() int { +	return int(v>>32) & 0x0fffffff  }  // Values is a slice of Value. Use this instead of []Value to reuse the underlying memory. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go index 7177fbb4b..3fc7aa143 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go @@ -5,9 +5,7 @@ func ResetMap[K comparable, V any](m map[K]V) map[K]V {  	if m == nil {  		m = make(map[K]V)  	} else { -		for v := range m { -			delete(m, v) -		} +		clear(m)  	}  	return m  } | 
