3 files changed, 281 insertions, 306 deletions
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go
index 23157b478..5d15bd9dc 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go
@@ -4,104 +4,100 @@ import "fmt"
 
 // These interfaces are implemented by ISA-specific backends to abstract away the details, and allow the register
 // allocators to work on any ISA.
-//
-// TODO: the interfaces are not stabilized yet, especially x64 will need some changes. E.g. x64 has an addressing mode
-// 	where index can be in memory. That kind of info will be useful to reduce the register pressure, and should be leveraged
-// 	by the register allocators, like https://docs.rs/regalloc2/latest/regalloc2/enum.OperandConstraint.html
 
 type (
 	// Function is the top-level interface to do register allocation, which corresponds to a CFG containing
 	// Blocks(s).
-	Function interface {
+	//
+	// I is the type of the instruction, and B is the type of the basic block.
+	Function[I Instr, B Block[I]] interface {
 		// PostOrderBlockIteratorBegin returns the first block in the post-order traversal of the CFG.
 		// In other words, the last blocks in the CFG will be returned first.
-		PostOrderBlockIteratorBegin() Block
+		PostOrderBlockIteratorBegin() B
 		// PostOrderBlockIteratorNext returns the next block in the post-order traversal of the CFG.
-		PostOrderBlockIteratorNext() Block
+		PostOrderBlockIteratorNext() B
 		// ReversePostOrderBlockIteratorBegin returns the first block in the reverse post-order traversal of the CFG.
 		// In other words, the first blocks in the CFG will be returned first.
-		ReversePostOrderBlockIteratorBegin() Block
+		ReversePostOrderBlockIteratorBegin() B
 		// ReversePostOrderBlockIteratorNext returns the next block in the reverse post-order traversal of the CFG.
-		ReversePostOrderBlockIteratorNext() Block
+		ReversePostOrderBlockIteratorNext() B
 		// ClobberedRegisters tell the clobbered registers by this function.
 		ClobberedRegisters([]VReg)
 		// LoopNestingForestRoots returns the number of roots of the loop nesting forest in a function.
 		LoopNestingForestRoots() int
 		// LoopNestingForestRoot returns the i-th root of the loop nesting forest in a function.
-		LoopNestingForestRoot(i int) Block
+		LoopNestingForestRoot(i int) B
 		// LowestCommonAncestor returns the lowest common ancestor of two blocks in the dominator tree.
-		LowestCommonAncestor(blk1, blk2 Block) Block
+		LowestCommonAncestor(blk1, blk2 B) B
 		// Idom returns the immediate dominator of the given block.
-		Idom(blk Block) Block
+		Idom(blk B) B
+
+		// LoopNestingForestChild returns the i-th child of the block in the loop nesting forest.
+		LoopNestingForestChild(b B, i int) B
+		// Pred returns the i-th predecessor of the block in the CFG.
+		Pred(b B, i int) B
+		// Succ returns the i-th successor of the block in the CFG.
+		Succ(b B, i int) B
+		// BlockParams returns the virtual registers used as the parameters of this block.
+		BlockParams(B, *[]VReg) []VReg
 
 		// Followings are for rewriting the function.
 
-		// SwapAtEndOfBlock swaps the two virtual registers at the end of the given block.
-		SwapBefore(x1, x2, tmp VReg, instr Instr)
+		// SwapBefore swaps the two virtual registers at the end of the given block.
+		SwapBefore(x1, x2, tmp VReg, instr I)
 		// StoreRegisterBefore inserts store instruction(s) before the given instruction for the given virtual register.
-		StoreRegisterBefore(v VReg, instr Instr)
+		StoreRegisterBefore(v VReg, instr I)
 		// StoreRegisterAfter inserts store instruction(s) after the given instruction for the given virtual register.
-		StoreRegisterAfter(v VReg, instr Instr)
+		StoreRegisterAfter(v VReg, instr I)
 		// ReloadRegisterBefore inserts reload instruction(s) before the given instruction for the given virtual register.
-		ReloadRegisterBefore(v VReg, instr Instr)
+		ReloadRegisterBefore(v VReg, instr I)
 		// ReloadRegisterAfter inserts reload instruction(s) after the given instruction for the given virtual register.
-		ReloadRegisterAfter(v VReg, instr Instr)
+		ReloadRegisterAfter(v VReg, instr I)
 		// InsertMoveBefore inserts move instruction(s) before the given instruction for the given virtual registers.
-		InsertMoveBefore(dst, src VReg, instr Instr)
+		InsertMoveBefore(dst, src VReg, instr I)
 	}
 
 	// Block is a basic block in the CFG of a function, and it consists of multiple instructions, and predecessor Block(s).
-	Block interface {
+	// Right now, this corresponds to a ssa.BasicBlock lowered to the machine level.
+	Block[I Instr] interface {
+		comparable
 		// ID returns the unique identifier of this block which is ordered in the reverse post-order traversal of the CFG.
 		ID() int32
-		// BlockParams returns the virtual registers used as the parameters of this block.
-		BlockParams(*[]VReg) []VReg
 		// InstrIteratorBegin returns the first instruction in this block. Instructions added after lowering must be skipped.
 		// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
-		InstrIteratorBegin() Instr
+		InstrIteratorBegin() I
 		// InstrIteratorNext returns the next instruction in this block. Instructions added after lowering must be skipped.
 		// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
-		InstrIteratorNext() Instr
+		InstrIteratorNext() I
 		// InstrRevIteratorBegin is the same as InstrIteratorBegin, but in the reverse order.
-		InstrRevIteratorBegin() Instr
+		InstrRevIteratorBegin() I
 		// InstrRevIteratorNext is the same as InstrIteratorNext, but in the reverse order.
-		InstrRevIteratorNext() Instr
+		InstrRevIteratorNext() I
 		// FirstInstr returns the fist instruction in this block where instructions will be inserted after it.
-		FirstInstr() Instr
-		// EndInstr returns the end instruction in this block.
-		EndInstr() Instr
+		FirstInstr() I
 		// LastInstrForInsertion returns the last instruction in this block where instructions will be inserted before it.
 		// Such insertions only happen when we need to insert spill/reload instructions to adjust the merge edges.
 		// At the time of register allocation, all the critical edges are already split, so there is no need
 		// to worry about the case where branching instruction has multiple successors.
 		// Therefore, usually, it is the nop instruction, but if the block ends with an unconditional branching, then it returns
 		// the unconditional branch, not the nop. In other words it is either nop or unconditional branch.
-		LastInstrForInsertion() Instr
+		LastInstrForInsertion() I
 		// Preds returns the number of predecessors of this block in the CFG.
 		Preds() int
-		// Pred returns the i-th predecessor of this block in the CFG.
-		Pred(i int) Block
 		// Entry returns true if the block is for the entry block.
 		Entry() bool
 		// Succs returns the number of successors of this block in the CFG.
 		Succs() int
-		// Succ returns the i-th successor of this block in the CFG.
-		Succ(i int) Block
 		// LoopHeader returns true if this block is a loop header.
 		LoopHeader() bool
 		// LoopNestingForestChildren returns the number of children of this block in the loop nesting forest.
 		LoopNestingForestChildren() int
-		// LoopNestingForestChild returns the i-th child of this block in the loop nesting forest.
-		LoopNestingForestChild(i int) Block
 	}
 
 	// Instr is an instruction in a block, abstracting away the underlying ISA.
 	Instr interface {
+		comparable
 		fmt.Stringer
-		// Next returns the next instruction in the same block.
-		Next() Instr
-		// Prev returns the previous instruction in the same block.
-		Prev() Instr
 		// Defs returns the virtual registers defined by this instruction.
 		Defs(*[]VReg) []VReg
 		// Uses returns the virtual registers used by this instruction.
@@ -124,13 +120,5 @@ type (
 		IsIndirectCall() bool
 		// IsReturn returns true if this instruction is a return instruction.
 		IsReturn() bool
-		// AddedBeforeRegAlloc returns true if this instruction is added before register allocation.
-		AddedBeforeRegAlloc() bool
-	}
-
-	// InstrConstraint is an interface for arch-specific instruction constraints.
-	InstrConstraint interface {
-		comparable
-		Instr
 	}
 )
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
index eacb6a7ef..a5857f4f2 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
@@ -18,13 +18,13 @@ import (
 )
 
 // NewAllocator returns a new Allocator.
-func NewAllocator(allocatableRegs *RegisterInfo) Allocator {
-	a := Allocator{
+func NewAllocator[I Instr, B Block[I], F Function[I, B]](allocatableRegs *RegisterInfo) Allocator[I, B, F] {
+	a := Allocator[I, B, F]{
 		regInfo:            allocatableRegs,
-		phiDefInstListPool: wazevoapi.NewPool[phiDefInstList](resetPhiDefInstList),
-		blockStates:        wazevoapi.NewIDedPool[blockState](resetBlockState),
+		phiDefInstListPool: wazevoapi.NewPool[phiDefInstList[I]](resetPhiDefInstList[I]),
+		blockStates:        wazevoapi.NewIDedPool[blockState[I, B, F]](resetBlockState[I, B, F]),
 	}
-	a.state.vrStates = wazevoapi.NewIDedPool[vrState](resetVrState)
+	a.state.vrStates = wazevoapi.NewIDedPool[vrState[I, B, F]](resetVrState[I, B, F])
 	a.state.reset()
 	for _, regs := range allocatableRegs.AllocatableRegisters {
 		for _, r := range regs {
@@ -49,32 +49,39 @@ type (
 	}
 
 	// Allocator is a register allocator.
-	Allocator struct {
+	Allocator[I Instr, B Block[I], F Function[I, B]] struct {
 		// regInfo is static per ABI/ISA, and is initialized by the machine during Machine.PrepareRegisterAllocator.
 		regInfo *RegisterInfo
 		// allocatableSet is a set of allocatable RealReg derived from regInfo. Static per ABI/ISA.
 		allocatableSet           RegSet
 		allocatedCalleeSavedRegs []VReg
 		vs                       []VReg
-		vs2                      []VRegID
-		phiDefInstListPool       wazevoapi.Pool[phiDefInstList]
+		ss                       []*vrState[I, B, F]
+		copies                   []_copy[I, B, F]
+		phiDefInstListPool       wazevoapi.Pool[phiDefInstList[I]]
 
 		// Followings are re-used during various places.
-		blks  []Block
+		blks  []B
 		reals []RealReg
 
 		// Following two fields are updated while iterating the blocks in the reverse postorder.
-		state       state
-		blockStates wazevoapi.IDedPool[blockState]
+		state       state[I, B, F]
+		blockStates wazevoapi.IDedPool[blockState[I, B, F]]
+	}
+
+	// _copy represents a source and destination pair of a copy instruction.
+	_copy[I Instr, B Block[I], F Function[I, B]] struct {
+		src   *vrState[I, B, F]
+		dstID VRegID
 	}
 
 	// programCounter represents an opaque index into the program which is used to represents a LiveInterval of a VReg.
 	programCounter int32
 
-	state struct {
+	state[I Instr, B Block[I], F Function[I, B]] struct {
 		argRealRegs []VReg
-		regsInUse   regInUseSet
-		vrStates    wazevoapi.IDedPool[vrState]
+		regsInUse   regInUseSet[I, B, F]
+		vrStates    wazevoapi.IDedPool[vrState[I, B, F]]
 
 		currentBlockID int32
 
@@ -82,30 +89,30 @@ type (
 		allocatedRegSet RegSet
 	}
 
-	blockState struct {
+	blockState[I Instr, B Block[I], F Function[I, B]] struct {
 		// liveIns is a list of VReg that are live at the beginning of the block.
-		liveIns []VRegID
+		liveIns []*vrState[I, B, F]
 		// seen is true if the block is visited during the liveness analysis.
 		seen bool
 		// visited is true if the block is visited during the allocation phase.
 		visited            bool
 		startFromPredIndex int
 		// startRegs is a list of RealReg that are used at the beginning of the block. This is used to fix the merge edges.
-		startRegs regInUseSet
+		startRegs regInUseSet[I, B, F]
 		// endRegs is a list of RealReg that are used at the end of the block. This is used to fix the merge edges.
-		endRegs regInUseSet
+		endRegs regInUseSet[I, B, F]
 	}
 
-	vrState struct {
+	vrState[I Instr, B Block[I], f Function[I, B]] struct {
 		v VReg
 		r RealReg
 		// defInstr is the instruction that defines this value. If this is the phi value and not the entry block, this is nil.
-		defInstr Instr
+		defInstr I
 		// defBlk is the block that defines this value. If this is the phi value, this is the block whose arguments contain this value.
-		defBlk Block
+		defBlk B
 		// lca = lowest common ancestor. This is the block that is the lowest common ancestor of all the blocks that
 		// reloads this value. This is used to determine the spill location. Only valid if spilled=true.
-		lca Block
+		lca B
 		// lastUse is the program counter of the last use of this value. This changes while iterating the block, and
 		// should not be used across the blocks as it becomes invalid. To check the validity, use lastUseUpdatedAtBlockID.
 		lastUse                 programCounter
@@ -120,14 +127,14 @@ type (
 		desiredLoc desiredLoc
 		// phiDefInstList is a list of instructions that defines this phi value.
 		// This is used to determine the spill location, and only valid if isPhi=true.
-		*phiDefInstList
+		*phiDefInstList[I]
 	}
 
 	// phiDefInstList is a linked list of instructions that defines a phi value.
-	phiDefInstList struct {
-		instr Instr
+	phiDefInstList[I Instr] struct {
+		instr I
 		v     VReg
-		next  *phiDefInstList
+		next  *phiDefInstList[I]
 	}
 
 	// desiredLoc represents a desired location for a VReg.
@@ -159,13 +166,14 @@ func (d desiredLoc) stack() bool {
 	return d&3 == desiredLoc(desiredLocKindStack)
 }
 
-func resetPhiDefInstList(l *phiDefInstList) {
-	l.instr = nil
+func resetPhiDefInstList[I Instr](l *phiDefInstList[I]) {
+	var nilInstr I
+	l.instr = nilInstr
 	l.next = nil
 	l.v = VRegInvalid
 }
 
-func (s *state) dump(info *RegisterInfo) { //nolint:unused
+func (s *state[I, B, F]) dump(info *RegisterInfo) { //nolint:unused
 	fmt.Println("\t\tstate:")
 	fmt.Println("\t\t\targRealRegs:", s.argRealRegs)
 	fmt.Println("\t\t\tregsInUse", s.regsInUse.format(info))
@@ -184,7 +192,7 @@ func (s *state) dump(info *RegisterInfo) { //nolint:unused
 	fmt.Println("\t\t\tvrStates:", strings.Join(strs, ", "))
 }
 
-func (s *state) reset() {
+func (s *state[I, B, F]) reset() {
 	s.argRealRegs = s.argRealRegs[:0]
 	s.vrStates.Reset()
 	s.allocatedRegSet = RegSet(0)
@@ -192,79 +200,74 @@ func (s *state) reset() {
 	s.currentBlockID = -1
 }
 
-func (s *state) setVRegState(v VReg, r RealReg) {
-	id := int(v.ID())
-	st := s.vrStates.GetOrAllocate(id)
-	st.r = r
-	st.v = v
-}
-
-func resetVrState(vs *vrState) {
+func resetVrState[I Instr, B Block[I], F Function[I, B]](vs *vrState[I, B, F]) {
 	vs.v = VRegInvalid
 	vs.r = RealRegInvalid
-	vs.defInstr = nil
-	vs.defBlk = nil
+	var nilInstr I
+	vs.defInstr = nilInstr
+	var nilBlk B
+	vs.defBlk = nilBlk
 	vs.spilled = false
 	vs.lastUse = -1
 	vs.lastUseUpdatedAtBlockID = -1
-	vs.lca = nil
+	vs.lca = nilBlk
 	vs.isPhi = false
 	vs.phiDefInstList = nil
 	vs.desiredLoc = desiredLocUnspecified
 }
 
-func (s *state) getVRegState(v VRegID) *vrState {
-	return s.vrStates.GetOrAllocate(int(v))
+func (s *state[I, B, F]) getOrAllocateVRegState(v VReg) *vrState[I, B, F] {
+	st := s.vrStates.GetOrAllocate(int(v.ID()))
+	if st.v == VRegInvalid {
+		st.v = v
+	}
+	return st
 }
 
-func (s *state) useRealReg(r RealReg, v VReg) {
-	if s.regsInUse.has(r) {
-		panic("BUG: useRealReg: the given real register is already used")
-	}
-	s.regsInUse.add(r, v)
-	s.setVRegState(v, r)
+func (s *state[I, B, F]) getVRegState(v VRegID) *vrState[I, B, F] {
+	return s.vrStates.Get(int(v))
+}
+
+func (s *state[I, B, F]) useRealReg(r RealReg, vr *vrState[I, B, F]) {
+	s.regsInUse.add(r, vr)
+	vr.r = r
 	s.allocatedRegSet = s.allocatedRegSet.add(r)
 }
 
-func (s *state) releaseRealReg(r RealReg) {
+func (s *state[I, B, F]) releaseRealReg(r RealReg) {
 	current := s.regsInUse.get(r)
-	if current.Valid() {
+	if current != nil {
 		s.regsInUse.remove(r)
-		s.setVRegState(current, RealRegInvalid)
+		current.r = RealRegInvalid
 	}
 }
 
 // recordReload records that the given VReg is reloaded in the given block.
 // This is used to determine the spill location by tracking the lowest common ancestor of all the blocks that reloads the value.
-func (vs *vrState) recordReload(f Function, blk Block) {
+func (vs *vrState[I, B, F]) recordReload(f F, blk B) {
 	vs.spilled = true
-	if vs.lca == nil {
+	var nilBlk B
+	if lca := vs.lca; lca == nilBlk {
 		if wazevoapi.RegAllocLoggingEnabled {
 			fmt.Printf("\t\tv%d is reloaded in blk%d,\n", vs.v.ID(), blk.ID())
 		}
 		vs.lca = blk
-	} else {
+	} else if lca != blk {
 		if wazevoapi.RegAllocLoggingEnabled {
 			fmt.Printf("\t\tv%d is reloaded in blk%d, lca=%d\n", vs.v.ID(), blk.ID(), vs.lca.ID())
 		}
-		vs.lca = f.LowestCommonAncestor(vs.lca, blk)
+		vs.lca = f.LowestCommonAncestor(lca, blk)
 		if wazevoapi.RegAllocLoggingEnabled {
 			fmt.Printf("updated lca=%d\n", vs.lca.ID())
 		}
 	}
 }
 
-func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forbiddenMask RegSet, preferred RealReg) (r RealReg) {
+func (a *Allocator[I, B, F]) findOrSpillAllocatable(s *state[I, B, F], allocatable []RealReg, forbiddenMask RegSet, preferred RealReg) (r RealReg) {
 	r = RealRegInvalid
 	// First, check if the preferredMask has any allocatable register.
 	if preferred != RealRegInvalid && !forbiddenMask.has(preferred) && !s.regsInUse.has(preferred) {
-		for _, candidateReal := range allocatable {
-			// TODO: we should ensure the preferred register is in the allocatable set in the first place,
-			//  but right now, just in case, we check it here.
-			if candidateReal == preferred {
-				return preferred
-			}
-		}
+		return preferred
 	}
 
 	var lastUseAt programCounter
@@ -275,7 +278,7 @@ func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forb
 		}
 
 		using := s.regsInUse.get(candidateReal)
-		if using == VRegInvalid {
+		if using == nil {
 			// This is not used at this point.
 			return candidateReal
 		}
@@ -284,17 +287,17 @@ func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forb
 		// For example, if the register is used as an argument register, and it might be
 		// spilled and not reloaded when it ends up being used as a temporary to pass
 		// stack based argument.
-		if using.IsRealReg() {
+		if using.v.IsRealReg() {
 			continue
 		}
 
 		isPreferred := candidateReal == preferred
 
 		// last == -1 means the value won't be used anymore.
-		if last := s.getVRegState(using.ID()).lastUse; r == RealRegInvalid || isPreferred || last == -1 || (lastUseAt != -1 && last > lastUseAt) {
+		if last := using.lastUse; r == RealRegInvalid || isPreferred || last == -1 || (lastUseAt != -1 && last > lastUseAt) {
 			lastUseAt = last
 			r = candidateReal
-			spillVReg = using
+			spillVReg = using.v
 			if isPreferred {
 				break
 			}
@@ -312,7 +315,7 @@ func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forb
 	return r
 }
 
-func (s *state) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) RealReg {
+func (s *state[I, B, F]) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) RealReg {
 	for _, r := range allocatable {
 		if !s.regsInUse.has(r) && !forbiddenMask.has(r) {
 			return r
@@ -321,22 +324,20 @@ func (s *state) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) Rea
 	return RealRegInvalid
 }
 
-func (s *state) resetAt(bs *blockState) {
-	s.regsInUse.range_(func(_ RealReg, vr VReg) {
-		s.setVRegState(vr, RealRegInvalid)
+func (s *state[I, B, F]) resetAt(bs *blockState[I, B, F]) {
+	s.regsInUse.range_(func(_ RealReg, vs *vrState[I, B, F]) {
+		vs.r = RealRegInvalid
 	})
 	s.regsInUse.reset()
-	bs.endRegs.range_(func(r RealReg, v VReg) {
-		id := int(v.ID())
-		st := s.vrStates.GetOrAllocate(id)
-		if st.lastUseUpdatedAtBlockID == s.currentBlockID && st.lastUse == programCounterLiveIn {
-			s.regsInUse.add(r, v)
-			s.setVRegState(v, r)
+	bs.endRegs.range_(func(r RealReg, vs *vrState[I, B, F]) {
+		if vs.lastUseUpdatedAtBlockID == s.currentBlockID && vs.lastUse == programCounterLiveIn {
+			s.regsInUse.add(r, vs)
+			vs.r = r
 		}
 	})
 }
 
-func resetBlockState(b *blockState) {
+func resetBlockState[I Instr, B Block[I], F Function[I, B]](b *blockState[I, B, F]) {
 	b.seen = false
 	b.visited = false
 	b.endRegs.reset()
@@ -345,7 +346,7 @@ func resetBlockState(b *blockState) {
 	b.liveIns = b.liveIns[:0]
 }
 
-func (b *blockState) dump(a *RegisterInfo) {
+func (b *blockState[I, B, F]) dump(a *RegisterInfo) {
 	fmt.Println("\t\tblockState:")
 	fmt.Println("\t\t\tstartRegs:", b.startRegs.format(a))
 	fmt.Println("\t\t\tendRegs:", b.endRegs.format(a))
@@ -354,13 +355,13 @@ func (b *blockState) dump(a *RegisterInfo) {
 }
 
 // DoAllocation performs register allocation on the given Function.
-func (a *Allocator) DoAllocation(f Function) {
+func (a *Allocator[I, B, F]) DoAllocation(f F) {
 	a.livenessAnalysis(f)
 	a.alloc(f)
 	a.determineCalleeSavedRealRegs(f)
 }
 
-func (a *Allocator) determineCalleeSavedRealRegs(f Function) {
+func (a *Allocator[I, B, F]) determineCalleeSavedRealRegs(f F) {
 	a.allocatedCalleeSavedRegs = a.allocatedCalleeSavedRegs[:0]
 	a.state.allocatedRegSet.Range(func(allocatedRealReg RealReg) {
 		if a.regInfo.CalleeSavedRegisters.has(allocatedRealReg) {
@@ -370,17 +371,17 @@ func (a *Allocator) determineCalleeSavedRealRegs(f Function) {
 	f.ClobberedRegisters(a.allocatedCalleeSavedRegs)
 }
 
-func (a *Allocator) getOrAllocateBlockState(blockID int32) *blockState {
+func (a *Allocator[I, B, F]) getOrAllocateBlockState(blockID int32) *blockState[I, B, F] {
 	return a.blockStates.GetOrAllocate(int(blockID))
 }
 
 // phiBlk returns the block that defines the given phi value, nil otherwise.
-func (s *state) phiBlk(v VRegID) Block {
-	vs := s.getVRegState(v)
+func (vs *vrState[I, B, F]) phiBlk() B {
 	if vs.isPhi {
 		return vs.defBlk
 	}
-	return nil
+	var nilBlk B
+	return nilBlk
 }
 
 const (
@@ -390,31 +391,35 @@ const (
 
 // liveAnalysis constructs Allocator.blockLivenessData.
 // The algorithm here is described in https://pfalcon.github.io/ssabook/latest/book-full.pdf Chapter 9.2.
-func (a *Allocator) livenessAnalysis(f Function) {
+func (a *Allocator[I, B, F]) livenessAnalysis(f F) {
 	s := &a.state
-	for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() { // Order doesn't matter.
 
+	for i := VRegID(0); i < vRegIDReservedForRealNum; i++ {
+		s.getOrAllocateVRegState(VReg(i).SetRealReg(RealReg(i)))
+	}
+
+	var nilBlk B
+	var nilInstr I
+	for blk := f.PostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.PostOrderBlockIteratorNext() {
 		// We should gather phi value data.
-		for _, p := range blk.BlockParams(&a.vs) {
-			vs := s.getVRegState(p.ID())
+		for _, p := range f.BlockParams(blk, &a.vs) {
+			vs := s.getOrAllocateVRegState(p)
 			vs.isPhi = true
 			vs.defBlk = blk
 		}
-	}
 
-	for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() {
 		blkID := blk.ID()
 		info := a.getOrAllocateBlockState(blkID)
 
-		a.vs2 = a.vs2[:0]
+		a.ss = a.ss[:0]
 		const (
 			flagDeleted = false
 			flagLive    = true
 		)
 		ns := blk.Succs()
 		for i := 0; i < ns; i++ {
-			succ := blk.Succ(i)
-			if succ == nil {
+			succ := f.Succ(blk, i)
+			if succ == nilBlk {
 				continue
 			}
 
@@ -424,39 +429,39 @@ func (a *Allocator) livenessAnalysis(f Function) {
 				continue
 			}
 
-			for _, v := range succInfo.liveIns {
-				if s.phiBlk(v) != succ {
-					st := s.getVRegState(v)
+			for _, st := range succInfo.liveIns {
+				if st.phiBlk() != succ && st.spilled != flagLive { //nolint:gosimple
 					// We use .spilled field to store the flag.
 					st.spilled = flagLive
-					a.vs2 = append(a.vs2, v)
+					a.ss = append(a.ss, st)
 				}
 			}
 		}
 
-		for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() {
+		for instr := blk.InstrRevIteratorBegin(); instr != nilInstr; instr = blk.InstrRevIteratorNext() {
 
 			var use, def VReg
+			var defIsPhi bool
 			for _, def = range instr.Defs(&a.vs) {
 				if !def.IsRealReg() {
-					id := def.ID()
-					st := s.getVRegState(id)
-					// We use .spilled field to store the flag.
+					st := s.getOrAllocateVRegState(def)
+					defIsPhi = st.isPhi
+					// Note: We use .spilled field to store the flag.
 					st.spilled = flagDeleted
-					a.vs2 = append(a.vs2, id)
 				}
 			}
 			for _, use = range instr.Uses(&a.vs) {
 				if !use.IsRealReg() {
-					id := use.ID()
-					st := s.getVRegState(id)
-					// We use .spilled field to store the flag.
-					st.spilled = flagLive
-					a.vs2 = append(a.vs2, id)
+					st := s.getOrAllocateVRegState(use)
+					// Note: We use .spilled field to store the flag.
+					if st.spilled != flagLive { //nolint:gosimple
+						st.spilled = flagLive
+						a.ss = append(a.ss, st)
+					}
 				}
 			}
 
-			if def.Valid() && s.phiBlk(def.ID()) != nil {
+			if defIsPhi {
 				if use.Valid() && use.IsRealReg() {
 					// If the destination is a phi value, and the source is a real register, this is the beginning of the function.
 					a.state.argRealRegs = append(a.state.argRealRegs, use)
@@ -464,11 +469,10 @@ func (a *Allocator) livenessAnalysis(f Function) {
 			}
 		}
 
-		for _, v := range a.vs2 {
-			st := s.getVRegState(v)
+		for _, st := range a.ss {
 			// We use .spilled field to store the flag.
 			if st.spilled == flagLive { //nolint:gosimple
-				info.liveIns = append(info.liveIns, v)
+				info.liveIns = append(info.liveIns, st)
 				st.spilled = false
 			}
 		}
@@ -479,51 +483,48 @@ func (a *Allocator) livenessAnalysis(f Function) {
 	nrs := f.LoopNestingForestRoots()
 	for i := 0; i < nrs; i++ {
 		root := f.LoopNestingForestRoot(i)
-		a.loopTreeDFS(root)
+		a.loopTreeDFS(f, root)
 	}
 }
 
 // loopTreeDFS implements the Algorithm 9.3 in the book in an iterative way.
-func (a *Allocator) loopTreeDFS(entry Block) {
+func (a *Allocator[I, B, F]) loopTreeDFS(f F, entry B) {
 	a.blks = a.blks[:0]
 	a.blks = append(a.blks, entry)
 
-	s := &a.state
 	for len(a.blks) > 0 {
 		tail := len(a.blks) - 1
 		loop := a.blks[tail]
 		a.blks = a.blks[:tail]
-		a.vs2 = a.vs2[:0]
+		a.ss = a.ss[:0]
 		const (
 			flagDone    = false
 			flagPending = true
 		)
 		info := a.getOrAllocateBlockState(loop.ID())
-		for _, v := range info.liveIns {
-			if s.phiBlk(v) != loop {
-				a.vs2 = append(a.vs2, v)
-				st := s.getVRegState(v)
+		for _, st := range info.liveIns {
+			if st.phiBlk() != loop {
+				a.ss = append(a.ss, st)
 				// We use .spilled field to store the flag.
 				st.spilled = flagPending
 			}
 		}
 
-		var siblingAddedView []VRegID
+		var siblingAddedView []*vrState[I, B, F]
 		cn := loop.LoopNestingForestChildren()
 		for i := 0; i < cn; i++ {
-			child := loop.LoopNestingForestChild(i)
+			child := f.LoopNestingForestChild(loop, i)
 			childID := child.ID()
 			childInfo := a.getOrAllocateBlockState(childID)
 
 			if i == 0 {
 				begin := len(childInfo.liveIns)
-				for _, v := range a.vs2 {
-					st := s.getVRegState(v)
+				for _, st := range a.ss {
 					// We use .spilled field to store the flag.
 					if st.spilled == flagPending { //nolint:gosimple
 						st.spilled = flagDone
 						// TODO: deduplicate, though I don't think it has much impact.
-						childInfo.liveIns = append(childInfo.liveIns, v)
+						childInfo.liveIns = append(childInfo.liveIns, st)
 					}
 				}
 				siblingAddedView = childInfo.liveIns[begin:]
@@ -539,8 +540,7 @@ func (a *Allocator) loopTreeDFS(entry Block) {
 
 		if cn == 0 {
 			// If there's no forest child, we haven't cleared the .spilled field at this point.
-			for _, v := range a.vs2 {
-				st := s.getVRegState(v)
+			for _, st := range a.ss {
 				st.spilled = false
 			}
 		}
@@ -557,37 +557,36 @@ func (a *Allocator) loopTreeDFS(entry Block) {
 // the spill happens in the block that is the lowest common ancestor of all the blocks that reloads the value.
 //
 // All of these logics are almost the same as Go's compiler which has a dedicated description in the source file ^^.
-func (a *Allocator) alloc(f Function) {
+func (a *Allocator[I, B, F]) alloc(f F) {
 	// First we allocate each block in the reverse postorder (at least one predecessor should be allocated for each block).
-	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() {
+	var nilBlk B
+	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.ReversePostOrderBlockIteratorNext() {
 		if wazevoapi.RegAllocLoggingEnabled {
 			fmt.Printf("========== allocating blk%d ========\n", blk.ID())
 		}
 		if blk.Entry() {
-			a.finalizeStartReg(blk)
+			a.finalizeStartReg(f, blk)
 		}
 		a.allocBlock(f, blk)
 	}
 	// After the allocation, we all know the start and end state of each block. So we can fix the merge states.
-	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() {
+	for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.ReversePostOrderBlockIteratorNext() {
 		a.fixMergeState(f, blk)
 	}
 	// Finally, we insert the spill instructions as we know all the places where the reloads happen.
 	a.scheduleSpills(f)
 }
 
-func (a *Allocator) updateLiveInVRState(liveness *blockState) {
+func (a *Allocator[I, B, F]) updateLiveInVRState(liveness *blockState[I, B, F]) {
 	currentBlockID := a.state.currentBlockID
-	for _, v := range liveness.liveIns {
-		vs := a.state.getVRegState(v)
+	for _, vs := range liveness.liveIns {
 		vs.lastUse = programCounterLiveIn
 		vs.lastUseUpdatedAtBlockID = currentBlockID
 	}
 }
 
-func (a *Allocator) finalizeStartReg(blk Block) {
+func (a *Allocator[I, B, F]) finalizeStartReg(f F, blk B) {
 	bID := blk.ID()
-	liveness := a.getOrAllocateBlockState(bID)
 	s := &a.state
 	currentBlkState := a.getOrAllocateBlockState(bID)
 	if currentBlkState.startFromPredIndex > -1 {
@@ -595,20 +594,20 @@ func (a *Allocator) finalizeStartReg(blk Block) {
 	}
 
 	s.currentBlockID = bID
-	a.updateLiveInVRState(liveness)
+	a.updateLiveInVRState(currentBlkState)
 
 	preds := blk.Preds()
-	var predState *blockState
+	var predState *blockState[I, B, F]
 	switch preds {
 	case 0: // This is the entry block.
 	case 1:
-		predID := blk.Pred(0).ID()
+		predID := f.Pred(blk, 0).ID()
 		predState = a.getOrAllocateBlockState(predID)
 		currentBlkState.startFromPredIndex = 0
 	default:
 		// TODO: there should be some better heuristic to choose the predecessor.
 		for i := 0; i < preds; i++ {
-			predID := blk.Pred(i).ID()
+			predID := f.Pred(blk, i).ID()
 			if _predState := a.getOrAllocateBlockState(predID); _predState.visited {
 				predState = _predState
 				currentBlkState.startFromPredIndex = i
@@ -621,18 +620,18 @@ func (a *Allocator) finalizeStartReg(blk Block) {
 			panic(fmt.Sprintf("BUG: at lease one predecessor should be visited for blk%d", blk.ID()))
 		}
 		for _, u := range s.argRealRegs {
-			s.useRealReg(u.RealReg(), u)
+			s.useRealReg(u.RealReg(), s.getVRegState(u.ID()))
 		}
 		currentBlkState.startFromPredIndex = 0
-	} else if predState != nil {
+	} else {
 		if wazevoapi.RegAllocLoggingEnabled {
 			fmt.Printf("allocating blk%d starting from blk%d (on index=%d) \n",
-				bID, blk.Pred(currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex)
+				bID, f.Pred(blk, currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex)
 		}
 		s.resetAt(predState)
 	}
 
-	s.regsInUse.range_(func(allocated RealReg, v VReg) {
+	s.regsInUse.range_(func(allocated RealReg, v *vrState[I, B, F]) {
 		currentBlkState.startRegs.add(allocated, v)
 	})
 	if wazevoapi.RegAllocLoggingEnabled {
@@ -640,7 +639,7 @@ func (a *Allocator) finalizeStartReg(blk Block) {
 	}
 }
 
-func (a *Allocator) allocBlock(f Function, blk Block) {
+func (a *Allocator[I, B, F]) allocBlock(f F, blk B) {
 	bID := blk.ID()
 	s := &a.state
 	currentBlkState := a.getOrAllocateBlockState(bID)
@@ -651,36 +650,34 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
 	}
 
 	// Clears the previous state.
-	s.regsInUse.range_(func(allocatedRealReg RealReg, vr VReg) {
-		s.setVRegState(vr, RealRegInvalid)
-	})
+	s.regsInUse.range_(func(allocatedRealReg RealReg, vr *vrState[I, B, F]) { vr.r = RealRegInvalid })
 	s.regsInUse.reset()
 	// Then set the start state.
-	currentBlkState.startRegs.range_(func(allocatedRealReg RealReg, vr VReg) {
-		s.useRealReg(allocatedRealReg, vr)
-	})
+	currentBlkState.startRegs.range_(func(allocatedRealReg RealReg, vr *vrState[I, B, F]) { s.useRealReg(allocatedRealReg, vr) })
 
-	desiredUpdated := a.vs2[:0]
+	desiredUpdated := a.ss[:0]
 
 	// Update the last use of each VReg.
+	a.copies = a.copies[:0] // Stores the copy instructions.
 	var pc programCounter
-	for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() {
-		var use, def VReg
-		for _, use = range instr.Uses(&a.vs) {
+	var nilInstr I
+	for instr := blk.InstrIteratorBegin(); instr != nilInstr; instr = blk.InstrIteratorNext() {
+		var useState *vrState[I, B, F]
+		for _, use := range instr.Uses(&a.vs) {
+			useState = s.getVRegState(use.ID())
 			if !use.IsRealReg() {
-				s.getVRegState(use.ID()).lastUse = pc
+				useState.lastUse = pc
 			}
 		}
 
 		if instr.IsCopy() {
-			def = instr.Defs(&a.vs)[0]
+			def := instr.Defs(&a.vs)[0]
+			a.copies = append(a.copies, _copy[I, B, F]{src: useState, dstID: def.ID()})
 			r := def.RealReg()
 			if r != RealRegInvalid {
-				useID := use.ID()
-				vs := s.getVRegState(useID)
-				if !vs.isPhi { // TODO: no idea why do we need this.
-					vs.desiredLoc = newDesiredLocReg(r)
-					desiredUpdated = append(desiredUpdated, useID)
+				if !useState.isPhi { // TODO: no idea why do we need this.
+					useState.desiredLoc = newDesiredLocReg(r)
+					desiredUpdated = append(desiredUpdated, useState)
 				}
 			}
 		}
@@ -689,18 +686,18 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
 
 	// Mark all live-out values by checking live-in of the successors.
 	// While doing so, we also update the desired register values.
-	var succ Block
+	var succ B
+	var nilBlk B
 	for i, ns := 0, blk.Succs(); i < ns; i++ {
-		succ = blk.Succ(i)
-		if succ == nil {
+		succ = f.Succ(blk, i)
+		if succ == nilBlk {
 			continue
 		}
 
 		succID := succ.ID()
 		succState := a.getOrAllocateBlockState(succID)
-		for _, v := range succState.liveIns {
-			if s.phiBlk(v) != succ {
-				st := s.getVRegState(v)
+		for _, st := range succState.liveIns {
+			if st.phiBlk() != succ {
 				st.lastUse = programCounterLiveOut
 			}
 		}
@@ -709,43 +706,33 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
 			if wazevoapi.RegAllocLoggingEnabled {
 				fmt.Printf("blk%d -> blk%d: start_regs: %s\n", bID, succID, succState.startRegs.format(a.regInfo))
 			}
-			succState.startRegs.range_(func(allocatedRealReg RealReg, vr VReg) {
-				vs := s.getVRegState(vr.ID())
+			succState.startRegs.range_(func(allocatedRealReg RealReg, vs *vrState[I, B, F]) {
 				vs.desiredLoc = newDesiredLocReg(allocatedRealReg)
-				desiredUpdated = append(desiredUpdated, vr.ID())
+				desiredUpdated = append(desiredUpdated, vs)
 			})
-			for _, p := range succ.BlockParams(&a.vs) {
+			for _, p := range f.BlockParams(succ, &a.vs) {
 				vs := s.getVRegState(p.ID())
 				if vs.desiredLoc.realReg() == RealRegInvalid {
 					vs.desiredLoc = desiredLocStack
-					desiredUpdated = append(desiredUpdated, p.ID())
+					desiredUpdated = append(desiredUpdated, vs)
 				}
 			}
 		}
 	}
 
 	// Propagate the desired register values from the end of the block to the beginning.
-	for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() {
-		if instr.IsCopy() {
-			def := instr.Defs(&a.vs)[0]
-			defState := s.getVRegState(def.ID())
-			desired := defState.desiredLoc.realReg()
-			if desired == RealRegInvalid {
-				continue
-			}
-
-			use := instr.Uses(&a.vs)[0]
-			useID := use.ID()
-			useState := s.getVRegState(useID)
-			if s.phiBlk(useID) != succ && useState.desiredLoc == desiredLocUnspecified {
-				useState.desiredLoc = newDesiredLocReg(desired)
-				desiredUpdated = append(desiredUpdated, useID)
-			}
+	for _, instr := range a.copies {
+		defState := s.getVRegState(instr.dstID)
+		desired := defState.desiredLoc.realReg()
+		useState := instr.src
+		if useState.phiBlk() != succ && useState.desiredLoc == desiredLocUnspecified {
+			useState.desiredLoc = newDesiredLocReg(desired)
+			desiredUpdated = append(desiredUpdated, useState)
 		}
 	}
 
 	pc = 0
-	for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() {
+	for instr := blk.InstrIteratorBegin(); instr != nilInstr; instr = blk.InstrIteratorNext() {
 		if wazevoapi.RegAllocLoggingEnabled {
 			fmt.Println(instr)
 		}
@@ -777,12 +764,12 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
 				r := vs.r
 
 				if r == RealRegInvalid {
-					r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet,
+					r = a.findOrSpillAllocatable(s, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet,
 						// Prefer the desired register if it's available.
 						vs.desiredLoc.realReg())
 					vs.recordReload(f, blk)
 					f.ReloadRegisterBefore(use.SetRealReg(r), instr)
-					s.useRealReg(r, use)
+					s.useRealReg(r, vs)
 				}
 				if wazevoapi.RegAllocLoggingEnabled {
 					fmt.Printf("\ttrying to use v%v on %s\n", use.ID(), a.regInfo.RealRegName(r))
@@ -799,10 +786,9 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
 		}
 
 		isIndirect := instr.IsIndirectCall()
-		call := instr.IsCall() || isIndirect
-		if call {
+		if instr.IsCall() || isIndirect {
 			addr := RealRegInvalid
-			if instr.IsIndirectCall() {
+			if isIndirect {
 				addr = a.vs[0].RealReg()
 			}
 			a.releaseCallerSavedRegs(addr)
@@ -814,8 +800,8 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
 		a.reals = killSet
 
 		defs := instr.Defs(&a.vs)
-		switch {
-		case len(defs) > 1:
+		switch len(defs) {
+		default:
 			// Some instructions define multiple values on real registers.
 			// E.g. call instructions (following calling convention) / div instruction on x64 that defines both rax and rdx.
 			//
@@ -830,20 +816,21 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
 				if s.regsInUse.has(r) {
 					s.releaseRealReg(r)
 				}
-				s.useRealReg(r, def)
+				s.useRealReg(r, s.getVRegState(def.ID()))
 			}
-		case len(defs) == 1:
+		case 0:
+		case 1:
 			def := defs[0]
+			vState := s.getVRegState(def.ID())
 			if def.IsRealReg() {
 				r := def.RealReg()
 				if a.allocatableSet.has(r) {
 					if s.regsInUse.has(r) {
 						s.releaseRealReg(r)
 					}
-					s.useRealReg(r, def)
+					s.useRealReg(r, vState)
 				}
 			} else {
-				vState := s.getVRegState(def.ID())
 				r := vState.r
 
 				if desired := vState.desiredLoc.realReg(); desired != RealRegInvalid {
@@ -864,7 +851,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
 							}
 							r = desired
 							s.releaseRealReg(r)
-							s.useRealReg(r, def)
+							s.useRealReg(r, vState)
 						}
 					}
 				}
@@ -880,9 +867,9 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
 					}
 					if r == RealRegInvalid {
 						typ := def.RegType()
-						r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[typ], RegSet(0), RealRegInvalid)
+						r = a.findOrSpillAllocatable(s, a.regInfo.AllocatableRegisters[typ], RegSet(0), RealRegInvalid)
 					}
-					s.useRealReg(r, def)
+					s.useRealReg(r, vState)
 				}
 				dr := def.SetRealReg(r)
 				instr.AssignDef(dr)
@@ -915,9 +902,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
 		pc++
 	}
 
-	s.regsInUse.range_(func(allocated RealReg, v VReg) {
-		currentBlkState.endRegs.add(allocated, v)
-	})
+	s.regsInUse.range_(func(allocated RealReg, v *vrState[I, B, F]) { currentBlkState.endRegs.add(allocated, v) })
 
 	currentBlkState.visited = true
 	if wazevoapi.RegAllocLoggingEnabled {
@@ -925,31 +910,30 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
 	}
 
 	// Reset the desired end location.
-	for _, v := range desiredUpdated {
-		vs := s.getVRegState(v)
+	for _, vs := range desiredUpdated {
 		vs.desiredLoc = desiredLocUnspecified
 	}
-	a.vs2 = desiredUpdated[:0]
+	a.ss = desiredUpdated[:0]
 
 	for i := 0; i < blk.Succs(); i++ {
-		succ := blk.Succ(i)
-		if succ == nil {
+		succ := f.Succ(blk, i)
+		if succ == nilBlk {
 			continue
 		}
 		// If the successor is not visited yet, finalize the start state.
-		a.finalizeStartReg(succ)
+		a.finalizeStartReg(f, succ)
 	}
 }
 
-func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) {
+func (a *Allocator[I, B, F]) releaseCallerSavedRegs(addrReg RealReg) {
 	s := &a.state
 
 	for allocated := RealReg(0); allocated < 64; allocated++ {
 		if allocated == addrReg { // If this is the call indirect, we should not touch the addr register.
 			continue
 		}
-		if v := s.regsInUse.get(allocated); v.Valid() {
-			if v.IsRealReg() {
+		if vs := s.regsInUse.get(allocated); vs != nil {
+			if vs.v.IsRealReg() {
 				continue // This is the argument register as it's already used by VReg backed by the corresponding RealReg.
 			}
 			if !a.regInfo.CallerSavedRegisters.has(allocated) {
@@ -961,7 +945,7 @@ func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) {
 	}
 }
 
-func (a *Allocator) fixMergeState(f Function, blk Block) {
+func (a *Allocator[I, B, F]) fixMergeState(f F, blk B) {
 	preds := blk.Preds()
 	if preds <= 1 {
 		return
@@ -975,7 +959,7 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {
 	desiredOccupants := &blkSt.startRegs
 	var desiredOccupantsSet RegSet
 	for i, v := range desiredOccupants {
-		if v != VRegInvalid {
+		if v != nil {
 			desiredOccupantsSet = desiredOccupantsSet.add(RealReg(i))
 		}
 	}
@@ -992,7 +976,7 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {
 			continue
 		}
 
-		pred := blk.Pred(i)
+		pred := f.Pred(blk, i)
 		predSt := a.getOrAllocateBlockState(pred.ID())
 
 		s.resetAt(predSt)
@@ -1012,16 +996,16 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {
 
 		for r := RealReg(0); r < 64; r++ {
 			desiredVReg := desiredOccupants.get(r)
-			if !desiredVReg.Valid() {
+			if desiredVReg == nil {
 				continue
 			}
 
 			currentVReg := s.regsInUse.get(r)
-			if desiredVReg.ID() == currentVReg.ID() {
+			if currentVReg != nil && desiredVReg.v.ID() == currentVReg.v.ID() {
 				continue
 			}
 
-			typ := desiredVReg.RegType()
+			typ := desiredVReg.v.RegType()
 			var tmpRealReg VReg
 			if typ == RegTypeInt {
 				tmpRealReg = intTmp
@@ -1039,13 +1023,18 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {
 //   - desiredVReg is the desired VReg value that should be on the register `r`.
 //   - freeReg is the temporary register that can be used to swap the values, which may or may not be used.
 //   - typ is the register type of the `r`.
-func (a *Allocator) reconcileEdge(f Function,
+func (a *Allocator[I, B, F]) reconcileEdge(f F,
 	r RealReg,
-	pred Block,
-	currentVReg, desiredVReg VReg,
+	pred B,
+	currentState, desiredState *vrState[I, B, F],
 	freeReg VReg,
 	typ RegType,
 ) {
+	desiredVReg := desiredState.v
+	currentVReg := VRegInvalid
+	if currentState != nil {
+		currentVReg = currentState.v
+	}
 	// There are four cases to consider:
 	// 1. currentVReg is valid, but desiredVReg is on the stack.
 	// 2. Both currentVReg and desiredVReg are valid.
@@ -1054,7 +1043,6 @@ func (a *Allocator) reconcileEdge(f Function,
 
 	s := &a.state
 	if currentVReg.Valid() {
-		desiredState := s.getVRegState(desiredVReg.ID())
 		er := desiredState.r
 		if er == RealRegInvalid {
 			// Case 1: currentVReg is valid, but desiredVReg is on the stack.
@@ -1068,9 +1056,9 @@ func (a *Allocator) reconcileEdge(f Function,
 			f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion())
 			s.releaseRealReg(r)
 
-			s.getVRegState(desiredVReg.ID()).recordReload(f, pred)
+			desiredState.recordReload(f, pred)
 			f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
-			s.useRealReg(r, desiredVReg)
+			s.useRealReg(r, desiredState)
 			return
 		} else {
 			// Case 2: Both currentVReg and desiredVReg are valid.
@@ -1089,8 +1077,8 @@ func (a *Allocator) reconcileEdge(f Function,
 			s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg())
 			s.releaseRealReg(r)
 			s.releaseRealReg(er)
-			s.useRealReg(r, desiredVReg)
-			s.useRealReg(er, currentVReg)
+			s.useRealReg(r, desiredState)
+			s.useRealReg(er, currentState)
 			if wazevoapi.RegAllocLoggingEnabled {
 				fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er))
 			}
@@ -1101,7 +1089,7 @@ func (a *Allocator) reconcileEdge(f Function,
 				desiredVReg.ID(), a.regInfo.RealRegName(r),
 			)
 		}
-		if currentReg := s.getVRegState(desiredVReg.ID()).r; currentReg != RealRegInvalid {
+		if currentReg := desiredState.r; currentReg != RealRegInvalid {
 			// Case 3: Desired is on a different register than `r` and currentReg is not valid.
 			// We simply need to move the desired value to the register.
 			f.InsertMoveBefore(
@@ -1113,14 +1101,14 @@ func (a *Allocator) reconcileEdge(f Function,
 		} else {
 			// Case 4: Both currentVReg and desiredVReg are not valid.
 			// We simply need to reload the desired value into the register.
-			s.getVRegState(desiredVReg.ID()).recordReload(f, pred)
+			desiredState.recordReload(f, pred)
 			f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
 		}
-		s.useRealReg(r, desiredVReg)
+		s.useRealReg(r, desiredState)
 	}
 }
 
-func (a *Allocator) scheduleSpills(f Function) {
+func (a *Allocator[I, B, F]) scheduleSpills(f F) {
 	states := a.state.vrStates
 	for i := 0; i <= states.MaxIDEncountered(); i++ {
 		vs := states.Get(i)
@@ -1133,7 +1121,7 @@ func (a *Allocator) scheduleSpills(f Function) {
 	}
 }
 
-func (a *Allocator) scheduleSpill(f Function, vs *vrState) {
+func (a *Allocator[I, B, F]) scheduleSpill(f F, vs *vrState[I, B, F]) {
 	v := vs.v
 	// If the value is the phi value, we need to insert a spill after each phi definition.
 	if vs.isPhi {
@@ -1146,10 +1134,11 @@ func (a *Allocator) scheduleSpill(f Function, vs *vrState) {
 	pos := vs.lca
 	definingBlk := vs.defBlk
 	r := RealRegInvalid
-	if definingBlk == nil {
+	var nilBlk B
+	if definingBlk == nilBlk {
 		panic(fmt.Sprintf("BUG: definingBlk should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))
 	}
-	if pos == nil {
+	if pos == nilBlk {
 		panic(fmt.Sprintf("BUG: pos should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))
 	}
 
@@ -1159,7 +1148,7 @@ func (a *Allocator) scheduleSpill(f Function, vs *vrState) {
 	for pos != definingBlk {
 		st := a.getOrAllocateBlockState(pos.ID())
 		for rr := RealReg(0); rr < 64; rr++ {
-			if st.startRegs.get(rr) == v {
+			if vs := st.startRegs.get(rr); vs != nil && vs.v == v {
 				r = rr
 				// Already in the register, so we can place the spill at the beginning of the block.
 				break
@@ -1192,7 +1181,7 @@ func (a *Allocator) scheduleSpill(f Function, vs *vrState) {
 }
 
 // Reset resets the allocator's internal state so that it can be reused.
-func (a *Allocator) Reset() {
+func (a *Allocator[I, B, F]) Reset() {
 	a.state.reset()
 	a.blockStates.Reset()
 	a.phiDefInstListPool.Reset()
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
index 04a8e8f4d..ce84c9c0c 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
@@ -46,52 +46,50 @@ func (rs RegSet) Range(f func(allocatedRealReg RealReg)) {
 	}
 }
 
-type regInUseSet [64]VReg
+type regInUseSet[I Instr, B Block[I], F Function[I, B]] [64]*vrState[I, B, F]
 
-func newRegInUseSet() regInUseSet {
-	var ret regInUseSet
+func newRegInUseSet[I Instr, B Block[I], F Function[I, B]]() regInUseSet[I, B, F] {
+	var ret regInUseSet[I, B, F]
 	ret.reset()
 	return ret
 }
 
-func (rs *regInUseSet) reset() {
-	for i := range rs {
-		rs[i] = VRegInvalid
-	}
+func (rs *regInUseSet[I, B, F]) reset() {
+	clear(rs[:])
 }
 
-func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused
+func (rs *regInUseSet[I, B, F]) format(info *RegisterInfo) string { //nolint:unused
 	var ret []string
 	for i, vr := range rs {
-		if vr != VRegInvalid {
-			ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.ID()))
+		if vr != nil {
+			ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.v.ID()))
 		}
 	}
 	return strings.Join(ret, ", ")
 }
 
-func (rs *regInUseSet) has(r RealReg) bool {
-	return r < 64 && rs[r] != VRegInvalid
+func (rs *regInUseSet[I, B, F]) has(r RealReg) bool {
+	return r < 64 && rs[r] != nil
 }
 
-func (rs *regInUseSet) get(r RealReg) VReg {
+func (rs *regInUseSet[I, B, F]) get(r RealReg) *vrState[I, B, F] {
 	return rs[r]
 }
 
-func (rs *regInUseSet) remove(r RealReg) {
-	rs[r] = VRegInvalid
+func (rs *regInUseSet[I, B, F]) remove(r RealReg) {
+	rs[r] = nil
 }
 
-func (rs *regInUseSet) add(r RealReg, vr VReg) {
+func (rs *regInUseSet[I, B, F]) add(r RealReg, vr *vrState[I, B, F]) {
 	if r >= 64 {
 		return
 	}
 	rs[r] = vr
 }
 
-func (rs *regInUseSet) range_(f func(allocatedRealReg RealReg, vr VReg)) {
+func (rs *regInUseSet[I, B, F]) range_(f func(allocatedRealReg RealReg, vr *vrState[I, B, F])) {
 	for i, vr := range rs {
-		if vr != VRegInvalid {
+		if vr != nil {
 			f(RealReg(i), vr)
 		}
 	}