summaryrefslogtreecommitdiff
path: root/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc
diff options
context:
space:
mode:
authorLibravatar kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com>2024-05-27 15:46:15 +0000
committerLibravatar GitHub <noreply@github.com>2024-05-27 17:46:15 +0200
commit1e7b32490dfdccddd04f46d4b0416b48d749d51b (patch)
tree62a11365933a5a11e0800af64cbdf9172e5e6e7a /vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc
parent[chore] Small styling + link issues (#2933) (diff)
downloadgotosocial-1e7b32490dfdccddd04f46d4b0416b48d749d51b.tar.xz
[experiment] add alternative wasm sqlite3 implementation available via build-tag (#2863)
This allows for building GoToSocial with [SQLite transpiled to WASM](https://github.com/ncruces/go-sqlite3) and accessed through [Wazero](https://wazero.io/).
Diffstat (limited to 'vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc')
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go136
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go123
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go1212
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go108
4 files changed, 1579 insertions, 0 deletions
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go
new file mode 100644
index 000000000..23157b478
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go
@@ -0,0 +1,136 @@
+package regalloc
+
+import "fmt"
+
+// These interfaces are implemented by ISA-specific backends to abstract away the details, and allow the register
+// allocators to work on any ISA.
+//
+// TODO: the interfaces are not stabilized yet, especially x64 will need some changes. E.g. x64 has an addressing mode
+// where index can be in memory. That kind of info will be useful to reduce the register pressure, and should be leveraged
+// by the register allocators, like https://docs.rs/regalloc2/latest/regalloc2/enum.OperandConstraint.html
+
+type (
+ // Function is the top-level interface to do register allocation, which corresponds to a CFG containing
+ // Blocks(s).
+ Function interface {
+ // PostOrderBlockIteratorBegin returns the first block in the post-order traversal of the CFG.
+ // In other words, the last blocks in the CFG will be returned first.
+ PostOrderBlockIteratorBegin() Block
+ // PostOrderBlockIteratorNext returns the next block in the post-order traversal of the CFG.
+ PostOrderBlockIteratorNext() Block
+ // ReversePostOrderBlockIteratorBegin returns the first block in the reverse post-order traversal of the CFG.
+ // In other words, the first blocks in the CFG will be returned first.
+ ReversePostOrderBlockIteratorBegin() Block
+ // ReversePostOrderBlockIteratorNext returns the next block in the reverse post-order traversal of the CFG.
+ ReversePostOrderBlockIteratorNext() Block
+ // ClobberedRegisters tell the clobbered registers by this function.
+ ClobberedRegisters([]VReg)
+ // LoopNestingForestRoots returns the number of roots of the loop nesting forest in a function.
+ LoopNestingForestRoots() int
+ // LoopNestingForestRoot returns the i-th root of the loop nesting forest in a function.
+ LoopNestingForestRoot(i int) Block
+ // LowestCommonAncestor returns the lowest common ancestor of two blocks in the dominator tree.
+ LowestCommonAncestor(blk1, blk2 Block) Block
+ // Idom returns the immediate dominator of the given block.
+ Idom(blk Block) Block
+
+ // Followings are for rewriting the function.
+
+ // SwapAtEndOfBlock swaps the two virtual registers at the end of the given block.
+ SwapBefore(x1, x2, tmp VReg, instr Instr)
+ // StoreRegisterBefore inserts store instruction(s) before the given instruction for the given virtual register.
+ StoreRegisterBefore(v VReg, instr Instr)
+ // StoreRegisterAfter inserts store instruction(s) after the given instruction for the given virtual register.
+ StoreRegisterAfter(v VReg, instr Instr)
+ // ReloadRegisterBefore inserts reload instruction(s) before the given instruction for the given virtual register.
+ ReloadRegisterBefore(v VReg, instr Instr)
+ // ReloadRegisterAfter inserts reload instruction(s) after the given instruction for the given virtual register.
+ ReloadRegisterAfter(v VReg, instr Instr)
+ // InsertMoveBefore inserts move instruction(s) before the given instruction for the given virtual registers.
+ InsertMoveBefore(dst, src VReg, instr Instr)
+ }
+
+ // Block is a basic block in the CFG of a function, and it consists of multiple instructions, and predecessor Block(s).
+ Block interface {
+ // ID returns the unique identifier of this block which is ordered in the reverse post-order traversal of the CFG.
+ ID() int32
+ // BlockParams returns the virtual registers used as the parameters of this block.
+ BlockParams(*[]VReg) []VReg
+ // InstrIteratorBegin returns the first instruction in this block. Instructions added after lowering must be skipped.
+ // Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
+ InstrIteratorBegin() Instr
+ // InstrIteratorNext returns the next instruction in this block. Instructions added after lowering must be skipped.
+ // Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
+ InstrIteratorNext() Instr
+ // InstrRevIteratorBegin is the same as InstrIteratorBegin, but in the reverse order.
+ InstrRevIteratorBegin() Instr
+ // InstrRevIteratorNext is the same as InstrIteratorNext, but in the reverse order.
+ InstrRevIteratorNext() Instr
+ // FirstInstr returns the fist instruction in this block where instructions will be inserted after it.
+ FirstInstr() Instr
+ // EndInstr returns the end instruction in this block.
+ EndInstr() Instr
+ // LastInstrForInsertion returns the last instruction in this block where instructions will be inserted before it.
+ // Such insertions only happen when we need to insert spill/reload instructions to adjust the merge edges.
+ // At the time of register allocation, all the critical edges are already split, so there is no need
+ // to worry about the case where branching instruction has multiple successors.
+ // Therefore, usually, it is the nop instruction, but if the block ends with an unconditional branching, then it returns
+ // the unconditional branch, not the nop. In other words it is either nop or unconditional branch.
+ LastInstrForInsertion() Instr
+ // Preds returns the number of predecessors of this block in the CFG.
+ Preds() int
+ // Pred returns the i-th predecessor of this block in the CFG.
+ Pred(i int) Block
+ // Entry returns true if the block is for the entry block.
+ Entry() bool
+ // Succs returns the number of successors of this block in the CFG.
+ Succs() int
+ // Succ returns the i-th successor of this block in the CFG.
+ Succ(i int) Block
+ // LoopHeader returns true if this block is a loop header.
+ LoopHeader() bool
+ // LoopNestingForestChildren returns the number of children of this block in the loop nesting forest.
+ LoopNestingForestChildren() int
+ // LoopNestingForestChild returns the i-th child of this block in the loop nesting forest.
+ LoopNestingForestChild(i int) Block
+ }
+
+ // Instr is an instruction in a block, abstracting away the underlying ISA.
+ Instr interface {
+ fmt.Stringer
+ // Next returns the next instruction in the same block.
+ Next() Instr
+ // Prev returns the previous instruction in the same block.
+ Prev() Instr
+ // Defs returns the virtual registers defined by this instruction.
+ Defs(*[]VReg) []VReg
+ // Uses returns the virtual registers used by this instruction.
+ // Note: multiple returned []VReg will not be held at the same time, so it's safe to use the same slice for this.
+ Uses(*[]VReg) []VReg
+ // AssignUse assigns the RealReg-allocated virtual register used by this instruction at the given index.
+ AssignUse(index int, v VReg)
+ // AssignDef assigns a RealReg-allocated virtual register defined by this instruction.
+ // This only accepts one register because we don't allocate registers for multi-def instructions (i.e. call instruction)
+ AssignDef(VReg)
+ // IsCopy returns true if this instruction is a move instruction between two registers.
+ // If true, the instruction is of the form of dst = src, and if the src and dst do not interfere with each other,
+ // we could coalesce them, and hence the copy can be eliminated from the final code.
+ IsCopy() bool
+ // IsCall returns true if this instruction is a call instruction. The result is used to insert
+ // caller saved register spills and restores.
+ IsCall() bool
+ // IsIndirectCall returns true if this instruction is an indirect call instruction which calls a function pointer.
+ // The result is used to insert caller saved register spills and restores.
+ IsIndirectCall() bool
+ // IsReturn returns true if this instruction is a return instruction.
+ IsReturn() bool
+ // AddedBeforeRegAlloc returns true if this instruction is added before register allocation.
+ AddedBeforeRegAlloc() bool
+ }
+
+ // InstrConstraint is an interface for arch-specific instruction constraints.
+ InstrConstraint interface {
+ comparable
+ Instr
+ }
+)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go
new file mode 100644
index 000000000..46df807e6
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go
@@ -0,0 +1,123 @@
+package regalloc
+
+import (
+ "fmt"
+
+ "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// VReg represents a register which is assigned to an SSA value. This is used to represent a register in the backend.
+// A VReg may or may not be a physical register, and the info of physical register can be obtained by RealReg.
+type VReg uint64
+
+// VRegID is the lower 32bit of VReg, which is the pure identifier of VReg without RealReg info.
+type VRegID uint32
+
+// RealReg returns the RealReg of this VReg.
+func (v VReg) RealReg() RealReg {
+ return RealReg(v >> 32)
+}
+
+// IsRealReg returns true if this VReg is backed by a physical register.
+func (v VReg) IsRealReg() bool {
+ return v.RealReg() != RealRegInvalid
+}
+
+// FromRealReg returns a VReg from the given RealReg and RegType.
+// This is used to represent a specific pre-colored register in the backend.
+func FromRealReg(r RealReg, typ RegType) VReg {
+ rid := VRegID(r)
+ if rid > vRegIDReservedForRealNum {
+ panic(fmt.Sprintf("invalid real reg %d", r))
+ }
+ return VReg(r).SetRealReg(r).SetRegType(typ)
+}
+
+// SetRealReg sets the RealReg of this VReg and returns the updated VReg.
+func (v VReg) SetRealReg(r RealReg) VReg {
+ return VReg(r)<<32 | (v & 0xff_00_ffffffff)
+}
+
+// RegType returns the RegType of this VReg.
+func (v VReg) RegType() RegType {
+ return RegType(v >> 40)
+}
+
+// SetRegType sets the RegType of this VReg and returns the updated VReg.
+func (v VReg) SetRegType(t RegType) VReg {
+ return VReg(t)<<40 | (v & 0x00_ff_ffffffff)
+}
+
+// ID returns the VRegID of this VReg.
+func (v VReg) ID() VRegID {
+ return VRegID(v & 0xffffffff)
+}
+
+// Valid returns true if this VReg is Valid.
+func (v VReg) Valid() bool {
+ return v.ID() != vRegIDInvalid && v.RegType() != RegTypeInvalid
+}
+
+// RealReg represents a physical register.
+type RealReg byte
+
+const RealRegInvalid RealReg = 0
+
+const (
+ vRegIDInvalid VRegID = 1 << 31
+ VRegIDNonReservedBegin = vRegIDReservedForRealNum
+ vRegIDReservedForRealNum VRegID = 128
+ VRegInvalid = VReg(vRegIDInvalid)
+)
+
+// String implements fmt.Stringer.
+func (r RealReg) String() string {
+ switch r {
+ case RealRegInvalid:
+ return "invalid"
+ default:
+ return fmt.Sprintf("r%d", r)
+ }
+}
+
+// String implements fmt.Stringer.
+func (v VReg) String() string {
+ if v.IsRealReg() {
+ return fmt.Sprintf("r%d", v.ID())
+ }
+ return fmt.Sprintf("v%d?", v.ID())
+}
+
+// RegType represents the type of a register.
+type RegType byte
+
+const (
+ RegTypeInvalid RegType = iota
+ RegTypeInt
+ RegTypeFloat
+ NumRegType
+)
+
+// String implements fmt.Stringer.
+func (r RegType) String() string {
+ switch r {
+ case RegTypeInt:
+ return "int"
+ case RegTypeFloat:
+ return "float"
+ default:
+ return "invalid"
+ }
+}
+
+// RegTypeOf returns the RegType of the given ssa.Type.
+func RegTypeOf(p ssa.Type) RegType {
+ switch p {
+ case ssa.TypeI32, ssa.TypeI64:
+ return RegTypeInt
+ case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+ return RegTypeFloat
+ default:
+ panic("invalid type")
+ }
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
new file mode 100644
index 000000000..b4450d56f
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
@@ -0,0 +1,1212 @@
+// Package regalloc performs register allocation. The algorithm can work on any ISA by implementing the interfaces in
+// api.go.
+//
+// References:
+// - https://web.stanford.edu/class/archive/cs/cs143/cs143.1128/lectures/17/Slides17.pdf
+// - https://en.wikipedia.org/wiki/Chaitin%27s_algorithm
+// - https://llvm.org/ProjectsWithLLVM/2004-Fall-CS426-LS.pdf
+// - https://pfalcon.github.io/ssabook/latest/book-full.pdf: Chapter 9. for liveness analysis.
+// - https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go
+package regalloc
+
+import (
+ "fmt"
+ "math"
+ "strings"
+
+ "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// NewAllocator returns a new Allocator.
+func NewAllocator(allocatableRegs *RegisterInfo) Allocator {
+ a := Allocator{
+ regInfo: allocatableRegs,
+ phiDefInstListPool: wazevoapi.NewPool[phiDefInstList](resetPhiDefInstList),
+ blockStates: wazevoapi.NewIDedPool[blockState](resetBlockState),
+ }
+ a.state.vrStates = wazevoapi.NewIDedPool[vrState](resetVrState)
+ a.state.reset()
+ for _, regs := range allocatableRegs.AllocatableRegisters {
+ for _, r := range regs {
+ a.allocatableSet = a.allocatableSet.add(r)
+ }
+ }
+ return a
+}
+
+type (
+ // RegisterInfo holds the statically-known ISA-specific register information.
+ RegisterInfo struct {
+ // AllocatableRegisters is a 2D array of allocatable RealReg, indexed by regTypeNum and regNum.
+ // The order matters: the first element is the most preferred one when allocating.
+ AllocatableRegisters [NumRegType][]RealReg
+ CalleeSavedRegisters RegSet
+ CallerSavedRegisters RegSet
+ RealRegToVReg []VReg
+ // RealRegName returns the name of the given RealReg for debugging.
+ RealRegName func(r RealReg) string
+ RealRegType func(r RealReg) RegType
+ }
+
+ // Allocator is a register allocator.
+ Allocator struct {
+ // regInfo is static per ABI/ISA, and is initialized by the machine during Machine.PrepareRegisterAllocator.
+ regInfo *RegisterInfo
+ // allocatableSet is a set of allocatable RealReg derived from regInfo. Static per ABI/ISA.
+ allocatableSet RegSet
+ allocatedCalleeSavedRegs []VReg
+ vs []VReg
+ vs2 []VRegID
+ phiDefInstListPool wazevoapi.Pool[phiDefInstList]
+
+ // Followings are re-used during various places.
+ blks []Block
+ reals []RealReg
+ currentOccupants regInUseSet
+
+ // Following two fields are updated while iterating the blocks in the reverse postorder.
+ state state
+ blockStates wazevoapi.IDedPool[blockState]
+ }
+
+ // programCounter represents an opaque index into the program which is used to represents a LiveInterval of a VReg.
+ programCounter int32
+
+ state struct {
+ argRealRegs []VReg
+ regsInUse regInUseSet
+ vrStates wazevoapi.IDedPool[vrState]
+
+ currentBlockID int32
+
+ // allocatedRegSet is a set of RealReg that are allocated during the allocation phase. This is reset per function.
+ allocatedRegSet RegSet
+ }
+
+ blockState struct {
+ // liveIns is a list of VReg that are live at the beginning of the block.
+ liveIns []VRegID
+ // seen is true if the block is visited during the liveness analysis.
+ seen bool
+ // visited is true if the block is visited during the allocation phase.
+ visited bool
+ startFromPredIndex int
+ // startRegs is a list of RealReg that are used at the beginning of the block. This is used to fix the merge edges.
+ startRegs regInUseSet
+ // endRegs is a list of RealReg that are used at the end of the block. This is used to fix the merge edges.
+ endRegs regInUseSet
+ }
+
+ vrState struct {
+ v VReg
+ r RealReg
+ // defInstr is the instruction that defines this value. If this is the phi value and not the entry block, this is nil.
+ defInstr Instr
+ // defBlk is the block that defines this value. If this is the phi value, this is the block whose arguments contain this value.
+ defBlk Block
+ // lca = lowest common ancestor. This is the block that is the lowest common ancestor of all the blocks that
+ // reloads this value. This is used to determine the spill location. Only valid if spilled=true.
+ lca Block
+ // lastUse is the program counter of the last use of this value. This changes while iterating the block, and
+ // should not be used across the blocks as it becomes invalid. To check the validity, use lastUseUpdatedAtBlockID.
+ lastUse programCounter
+ lastUseUpdatedAtBlockID int32
+ // spilled is true if this value is spilled i.e. the value is reload from the stack somewhere in the program.
+ //
+ // Note that this field is used during liveness analysis for different purpose. This is used to determine the
+ // value is live-in or not.
+ spilled bool
+ // isPhi is true if this is a phi value.
+ isPhi bool
+ desiredLoc desiredLoc
+ // phiDefInstList is a list of instructions that defines this phi value.
+ // This is used to determine the spill location, and only valid if isPhi=true.
+ *phiDefInstList
+ }
+
+ // phiDefInstList is a linked list of instructions that defines a phi value.
+ phiDefInstList struct {
+ instr Instr
+ v VReg
+ next *phiDefInstList
+ }
+
+ // desiredLoc represents a desired location for a VReg.
+ desiredLoc uint16
+ // desiredLocKind is a kind of desired location for a VReg.
+ desiredLocKind uint16
+)
+
+const (
+ // desiredLocKindUnspecified is a kind of desired location for a VReg that is not specified.
+ desiredLocKindUnspecified desiredLocKind = iota
+ // desiredLocKindStack is a kind of desired location for a VReg that is on the stack, only used for the phi values.
+ desiredLocKindStack
+ // desiredLocKindReg is a kind of desired location for a VReg that is in a register.
+ desiredLocKindReg
+ desiredLocUnspecified = desiredLoc(desiredLocKindUnspecified)
+ desiredLocStack = desiredLoc(desiredLocKindStack)
+)
+
+func newDesiredLocReg(r RealReg) desiredLoc {
+ return desiredLoc(desiredLocKindReg) | desiredLoc(r<<2)
+}
+
+func (d desiredLoc) realReg() RealReg {
+ return RealReg(d >> 2)
+}
+
+func (d desiredLoc) stack() bool {
+ return d&3 == desiredLoc(desiredLocKindStack)
+}
+
+func resetPhiDefInstList(l *phiDefInstList) {
+ l.instr = nil
+ l.next = nil
+ l.v = VRegInvalid
+}
+
+func (s *state) dump(info *RegisterInfo) { //nolint:unused
+ fmt.Println("\t\tstate:")
+ fmt.Println("\t\t\targRealRegs:", s.argRealRegs)
+ fmt.Println("\t\t\tregsInUse", s.regsInUse.format(info))
+ fmt.Println("\t\t\tallocatedRegSet:", s.allocatedRegSet.format(info))
+ fmt.Println("\t\t\tused:", s.regsInUse.format(info))
+ var strs []string
+ for i := 0; i <= s.vrStates.MaxIDEncountered(); i++ {
+ vs := s.vrStates.Get(i)
+ if vs == nil {
+ continue
+ }
+ if vs.r != RealRegInvalid {
+ strs = append(strs, fmt.Sprintf("(v%d: %s)", vs.v.ID(), info.RealRegName(vs.r)))
+ }
+ }
+ fmt.Println("\t\t\tvrStates:", strings.Join(strs, ", "))
+}
+
+func (s *state) reset() {
+ s.argRealRegs = s.argRealRegs[:0]
+ s.vrStates.Reset()
+ s.allocatedRegSet = RegSet(0)
+ s.regsInUse.reset()
+ s.currentBlockID = -1
+}
+
+func (s *state) setVRegState(v VReg, r RealReg) {
+ id := int(v.ID())
+ st := s.vrStates.GetOrAllocate(id)
+ st.r = r
+ st.v = v
+}
+
+func resetVrState(vs *vrState) {
+ vs.v = VRegInvalid
+ vs.r = RealRegInvalid
+ vs.defInstr = nil
+ vs.defBlk = nil
+ vs.spilled = false
+ vs.lastUse = -1
+ vs.lastUseUpdatedAtBlockID = -1
+ vs.lca = nil
+ vs.isPhi = false
+ vs.phiDefInstList = nil
+ vs.desiredLoc = desiredLocUnspecified
+}
+
+func (s *state) getVRegState(v VRegID) *vrState {
+ return s.vrStates.GetOrAllocate(int(v))
+}
+
+func (s *state) useRealReg(r RealReg, v VReg) {
+ if s.regsInUse.has(r) {
+ panic("BUG: useRealReg: the given real register is already used")
+ }
+ s.regsInUse.add(r, v)
+ s.setVRegState(v, r)
+ s.allocatedRegSet = s.allocatedRegSet.add(r)
+}
+
+func (s *state) releaseRealReg(r RealReg) {
+ current := s.regsInUse.get(r)
+ if current.Valid() {
+ s.regsInUse.remove(r)
+ s.setVRegState(current, RealRegInvalid)
+ }
+}
+
+// recordReload records that the given VReg is reloaded in the given block.
+// This is used to determine the spill location by tracking the lowest common ancestor of all the blocks that reloads the value.
+func (vs *vrState) recordReload(f Function, blk Block) {
+ vs.spilled = true
+ if vs.lca == nil {
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("\t\tv%d is reloaded in blk%d,\n", vs.v.ID(), blk.ID())
+ }
+ vs.lca = blk
+ } else {
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("\t\tv%d is reloaded in blk%d, lca=%d\n", vs.v.ID(), blk.ID(), vs.lca.ID())
+ }
+ vs.lca = f.LowestCommonAncestor(vs.lca, blk)
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("updated lca=%d\n", vs.lca.ID())
+ }
+ }
+}
+
+func (s *state) findOrSpillAllocatable(a *Allocator, allocatable []RealReg, forbiddenMask RegSet, preferred RealReg) (r RealReg) {
+ r = RealRegInvalid
+ // First, check if the preferredMask has any allocatable register.
+ if preferred != RealRegInvalid && !forbiddenMask.has(preferred) && !s.regsInUse.has(preferred) {
+ for _, candidateReal := range allocatable {
+ // TODO: we should ensure the preferred register is in the allocatable set in the first place,
+ // but right now, just in case, we check it here.
+ if candidateReal == preferred {
+ return preferred
+ }
+ }
+ }
+
+ var lastUseAt programCounter
+ var spillVReg VReg
+ for _, candidateReal := range allocatable {
+ if forbiddenMask.has(candidateReal) {
+ continue
+ }
+
+ using := s.regsInUse.get(candidateReal)
+ if using == VRegInvalid {
+ // This is not used at this point.
+ return candidateReal
+ }
+
+ // Real registers in use should not be spilled, so we skip them.
+ // For example, if the register is used as an argument register, and it might be
+ // spilled and not reloaded when it ends up being used as a temporary to pass
+ // stack based argument.
+ if using.IsRealReg() {
+ continue
+ }
+
+ isPreferred := candidateReal == preferred
+
+ // last == -1 means the value won't be used anymore.
+ if last := s.getVRegState(using.ID()).lastUse; r == RealRegInvalid || isPreferred || last == -1 || (lastUseAt != -1 && last > lastUseAt) {
+ lastUseAt = last
+ r = candidateReal
+ spillVReg = using
+ if isPreferred {
+ break
+ }
+ }
+ }
+
+ if r == RealRegInvalid {
+ panic("not found any allocatable register")
+ }
+
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("\tspilling v%d when lastUseAt=%d and regsInUse=%s\n", spillVReg.ID(), lastUseAt, s.regsInUse.format(a.regInfo))
+ }
+ s.releaseRealReg(r)
+ return r
+}
+
+func (s *state) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) RealReg {
+ for _, r := range allocatable {
+ if !s.regsInUse.has(r) && !forbiddenMask.has(r) {
+ return r
+ }
+ }
+ return RealRegInvalid
+}
+
+func (s *state) resetAt(bs *blockState) {
+ s.regsInUse.range_(func(_ RealReg, vr VReg) {
+ s.setVRegState(vr, RealRegInvalid)
+ })
+ s.regsInUse.reset()
+ bs.endRegs.range_(func(r RealReg, v VReg) {
+ id := int(v.ID())
+ st := s.vrStates.GetOrAllocate(id)
+ if st.lastUseUpdatedAtBlockID == s.currentBlockID && st.lastUse == programCounterLiveIn {
+ s.regsInUse.add(r, v)
+ s.setVRegState(v, r)
+ }
+ })
+}
+
+func resetBlockState(b *blockState) {
+ b.seen = false
+ b.visited = false
+ b.endRegs.reset()
+ b.startRegs.reset()
+ b.startFromPredIndex = -1
+ b.liveIns = b.liveIns[:0]
+}
+
+func (b *blockState) dump(a *RegisterInfo) {
+ fmt.Println("\t\tblockState:")
+ fmt.Println("\t\t\tstartRegs:", b.startRegs.format(a))
+ fmt.Println("\t\t\tendRegs:", b.endRegs.format(a))
+ fmt.Println("\t\t\tstartFromPredIndex:", b.startFromPredIndex)
+ fmt.Println("\t\t\tvisited:", b.visited)
+}
+
+// DoAllocation performs register allocation on the given Function.
+func (a *Allocator) DoAllocation(f Function) {
+ a.livenessAnalysis(f)
+ a.alloc(f)
+ a.determineCalleeSavedRealRegs(f)
+}
+
+func (a *Allocator) determineCalleeSavedRealRegs(f Function) {
+ a.allocatedCalleeSavedRegs = a.allocatedCalleeSavedRegs[:0]
+ a.state.allocatedRegSet.Range(func(allocatedRealReg RealReg) {
+ if a.regInfo.CalleeSavedRegisters.has(allocatedRealReg) {
+ a.allocatedCalleeSavedRegs = append(a.allocatedCalleeSavedRegs, a.regInfo.RealRegToVReg[allocatedRealReg])
+ }
+ })
+ f.ClobberedRegisters(a.allocatedCalleeSavedRegs)
+}
+
+func (a *Allocator) getOrAllocateBlockState(blockID int32) *blockState {
+ return a.blockStates.GetOrAllocate(int(blockID))
+}
+
+// phiBlk returns the block that defines the given phi value, nil otherwise.
+func (s *state) phiBlk(v VRegID) Block {
+ vs := s.getVRegState(v)
+ if vs.isPhi {
+ return vs.defBlk
+ }
+ return nil
+}
+
+const (
+ programCounterLiveIn = math.MinInt32
+ programCounterLiveOut = math.MaxInt32
+)
+
+// liveAnalysis constructs Allocator.blockLivenessData.
+// The algorithm here is described in https://pfalcon.github.io/ssabook/latest/book-full.pdf Chapter 9.2.
+func (a *Allocator) livenessAnalysis(f Function) {
+ s := &a.state
+ for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() { // Order doesn't matter.
+
+ // We should gather phi value data.
+ for _, p := range blk.BlockParams(&a.vs) {
+ vs := s.getVRegState(p.ID())
+ vs.isPhi = true
+ vs.defBlk = blk
+ }
+ }
+
+ for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() {
+ blkID := blk.ID()
+ info := a.getOrAllocateBlockState(blkID)
+
+ a.vs2 = a.vs2[:0]
+ const (
+ flagDeleted = false
+ flagLive = true
+ )
+ ns := blk.Succs()
+ for i := 0; i < ns; i++ {
+ succ := blk.Succ(i)
+ if succ == nil {
+ continue
+ }
+
+ succID := succ.ID()
+ succInfo := a.getOrAllocateBlockState(succID)
+ if !succInfo.seen { // This means the back edge.
+ continue
+ }
+
+ for _, v := range succInfo.liveIns {
+ if s.phiBlk(v) != succ {
+ st := s.getVRegState(v)
+ // We use .spilled field to store the flag.
+ st.spilled = flagLive
+ a.vs2 = append(a.vs2, v)
+ }
+ }
+ }
+
+ for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() {
+
+ var use, def VReg
+ for _, def = range instr.Defs(&a.vs) {
+ if !def.IsRealReg() {
+ id := def.ID()
+ st := s.getVRegState(id)
+ // We use .spilled field to store the flag.
+ st.spilled = flagDeleted
+ a.vs2 = append(a.vs2, id)
+ }
+ }
+ for _, use = range instr.Uses(&a.vs) {
+ if !use.IsRealReg() {
+ id := use.ID()
+ st := s.getVRegState(id)
+ // We use .spilled field to store the flag.
+ st.spilled = flagLive
+ a.vs2 = append(a.vs2, id)
+ }
+ }
+
+ if def.Valid() && s.phiBlk(def.ID()) != nil {
+ if use.Valid() && use.IsRealReg() {
+ // If the destination is a phi value, and the source is a real register, this is the beginning of the function.
+ a.state.argRealRegs = append(a.state.argRealRegs, use)
+ }
+ }
+ }
+
+ for _, v := range a.vs2 {
+ st := s.getVRegState(v)
+ // We use .spilled field to store the flag.
+ if st.spilled == flagLive { //nolint:gosimple
+ info.liveIns = append(info.liveIns, v)
+ st.spilled = false
+ }
+ }
+
+ info.seen = true
+ }
+
+ nrs := f.LoopNestingForestRoots()
+ for i := 0; i < nrs; i++ {
+ root := f.LoopNestingForestRoot(i)
+ a.loopTreeDFS(root)
+ }
+}
+
+// loopTreeDFS implements the Algorithm 9.3 in the book in an iterative way.
+func (a *Allocator) loopTreeDFS(entry Block) {
+ a.blks = a.blks[:0]
+ a.blks = append(a.blks, entry)
+
+ s := &a.state
+ for len(a.blks) > 0 {
+ tail := len(a.blks) - 1
+ loop := a.blks[tail]
+ a.blks = a.blks[:tail]
+ a.vs2 = a.vs2[:0]
+ const (
+ flagDone = false
+ flagPending = true
+ )
+ info := a.getOrAllocateBlockState(loop.ID())
+ for _, v := range info.liveIns {
+ if s.phiBlk(v) != loop {
+ a.vs2 = append(a.vs2, v)
+ st := s.getVRegState(v)
+ // We use .spilled field to store the flag.
+ st.spilled = flagPending
+ }
+ }
+
+ var siblingAddedView []VRegID
+ cn := loop.LoopNestingForestChildren()
+ for i := 0; i < cn; i++ {
+ child := loop.LoopNestingForestChild(i)
+ childID := child.ID()
+ childInfo := a.getOrAllocateBlockState(childID)
+
+ if i == 0 {
+ begin := len(childInfo.liveIns)
+ for _, v := range a.vs2 {
+ st := s.getVRegState(v)
+ // We use .spilled field to store the flag.
+ if st.spilled == flagPending { //nolint:gosimple
+ st.spilled = flagDone
+ // TODO: deduplicate, though I don't think it has much impact.
+ childInfo.liveIns = append(childInfo.liveIns, v)
+ }
+ }
+ siblingAddedView = childInfo.liveIns[begin:]
+ } else {
+ // TODO: deduplicate, though I don't think it has much impact.
+ childInfo.liveIns = append(childInfo.liveIns, siblingAddedView...)
+ }
+
+ if child.LoopHeader() {
+ a.blks = append(a.blks, child)
+ }
+ }
+
+ if cn == 0 {
+ // If there's no forest child, we haven't cleared the .spilled field at this point.
+ for _, v := range a.vs2 {
+ st := s.getVRegState(v)
+ st.spilled = false
+ }
+ }
+ }
+}
+
+// alloc allocates registers for the given function by iterating the blocks in the reverse postorder.
+// The algorithm here is derived from the Go compiler's allocator https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go
+// In short, this is a simply linear scan register allocation where each block inherits the register allocation state from
+// one of its predecessors. Each block inherits the selected state and starts allocation from there.
+// If there's a discrepancy in the end states between predecessors, the adjustments are made to ensure consistency after allocation is done (which we call "fixing merge state").
+// The spill instructions (store into the dedicated slots) are inserted after all the allocations and fixing merge states. That is because
+// at the point, we all know where the reloads happen, and therefore we can know the best place to spill the values. More precisely,
+// the spill happens in the block that is the lowest common ancestor of all the blocks that reloads the value.
+//
+// All of these logics are almost the same as Go's compiler which has a dedicated description in the source file ^^.
+func (a *Allocator) alloc(f Function) {
+ // First we allocate each block in the reverse postorder (at least one predecessor should be allocated for each block).
+ for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() {
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("========== allocating blk%d ========\n", blk.ID())
+ }
+ if blk.Entry() {
+ a.finalizeStartReg(blk)
+ }
+ a.allocBlock(f, blk)
+ }
+ // After the allocation, we all know the start and end state of each block. So we can fix the merge states.
+ for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nil; blk = f.ReversePostOrderBlockIteratorNext() {
+ a.fixMergeState(f, blk)
+ }
+ // Finally, we insert the spill instructions as we know all the places where the reloads happen.
+ a.scheduleSpills(f)
+}
+
+func (a *Allocator) updateLiveInVRState(liveness *blockState) {
+ currentBlockID := a.state.currentBlockID
+ for _, v := range liveness.liveIns {
+ vs := a.state.getVRegState(v)
+ vs.lastUse = programCounterLiveIn
+ vs.lastUseUpdatedAtBlockID = currentBlockID
+ }
+}
+
+func (a *Allocator) finalizeStartReg(blk Block) {
+ bID := blk.ID()
+ liveness := a.getOrAllocateBlockState(bID)
+ s := &a.state
+ currentBlkState := a.getOrAllocateBlockState(bID)
+ if currentBlkState.startFromPredIndex > -1 {
+ return
+ }
+
+ s.currentBlockID = bID
+ a.updateLiveInVRState(liveness)
+
+ preds := blk.Preds()
+ var predState *blockState
+ switch preds {
+ case 0: // This is the entry block.
+ case 1:
+ predID := blk.Pred(0).ID()
+ predState = a.getOrAllocateBlockState(predID)
+ currentBlkState.startFromPredIndex = 0
+ default:
+ // TODO: there should be some better heuristic to choose the predecessor.
+ for i := 0; i < preds; i++ {
+ predID := blk.Pred(i).ID()
+ if _predState := a.getOrAllocateBlockState(predID); _predState.visited {
+ predState = _predState
+ currentBlkState.startFromPredIndex = i
+ break
+ }
+ }
+ }
+ if predState == nil {
+ if !blk.Entry() {
+ panic(fmt.Sprintf("BUG: at lease one predecessor should be visited for blk%d", blk.ID()))
+ }
+ for _, u := range s.argRealRegs {
+ s.useRealReg(u.RealReg(), u)
+ }
+ currentBlkState.startFromPredIndex = 0
+ } else if predState != nil {
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("allocating blk%d starting from blk%d (on index=%d) \n",
+ bID, blk.Pred(currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex)
+ }
+ s.resetAt(predState)
+ }
+
+ s.regsInUse.range_(func(allocated RealReg, v VReg) {
+ currentBlkState.startRegs.add(allocated, v)
+ })
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("finalized start reg for blk%d: %s\n", blk.ID(), currentBlkState.startRegs.format(a.regInfo))
+ }
+}
+
+func (a *Allocator) allocBlock(f Function, blk Block) {
+ bID := blk.ID()
+ s := &a.state
+ currentBlkState := a.getOrAllocateBlockState(bID)
+ s.currentBlockID = bID
+
+ if currentBlkState.startFromPredIndex < 0 {
+ panic("BUG: startFromPredIndex should be set in finalizeStartReg prior to allocBlock")
+ }
+
+ // Clears the previous state.
+ s.regsInUse.range_(func(allocatedRealReg RealReg, vr VReg) {
+ s.setVRegState(vr, RealRegInvalid)
+ })
+ s.regsInUse.reset()
+ // Then set the start state.
+ currentBlkState.startRegs.range_(func(allocatedRealReg RealReg, vr VReg) {
+ s.useRealReg(allocatedRealReg, vr)
+ })
+
+ desiredUpdated := a.vs2[:0]
+
+ // Update the last use of each VReg.
+ var pc programCounter
+ for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() {
+ var use, def VReg
+ for _, use = range instr.Uses(&a.vs) {
+ if !use.IsRealReg() {
+ s.getVRegState(use.ID()).lastUse = pc
+ }
+ }
+
+ if instr.IsCopy() {
+ def = instr.Defs(&a.vs)[0]
+ r := def.RealReg()
+ if r != RealRegInvalid {
+ useID := use.ID()
+ vs := s.getVRegState(useID)
+ if !vs.isPhi { // TODO: no idea why do we need this.
+ vs.desiredLoc = newDesiredLocReg(r)
+ desiredUpdated = append(desiredUpdated, useID)
+ }
+ }
+ }
+ pc++
+ }
+
+ // Mark all live-out values by checking live-in of the successors.
+ // While doing so, we also update the desired register values.
+ var succ Block
+ for i, ns := 0, blk.Succs(); i < ns; i++ {
+ succ = blk.Succ(i)
+ if succ == nil {
+ continue
+ }
+
+ succID := succ.ID()
+ succState := a.getOrAllocateBlockState(succID)
+ for _, v := range succState.liveIns {
+ if s.phiBlk(v) != succ {
+ st := s.getVRegState(v)
+ st.lastUse = programCounterLiveOut
+ }
+ }
+
+ if succState.startFromPredIndex > -1 {
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("blk%d -> blk%d: start_regs: %s\n", bID, succID, succState.startRegs.format(a.regInfo))
+ }
+ succState.startRegs.range_(func(allocatedRealReg RealReg, vr VReg) {
+ vs := s.getVRegState(vr.ID())
+ vs.desiredLoc = newDesiredLocReg(allocatedRealReg)
+ desiredUpdated = append(desiredUpdated, vr.ID())
+ })
+ for _, p := range succ.BlockParams(&a.vs) {
+ vs := s.getVRegState(p.ID())
+ if vs.desiredLoc.realReg() == RealRegInvalid {
+ vs.desiredLoc = desiredLocStack
+ desiredUpdated = append(desiredUpdated, p.ID())
+ }
+ }
+ }
+ }
+
+ // Propagate the desired register values from the end of the block to the beginning.
+ for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() {
+ if instr.IsCopy() {
+ def := instr.Defs(&a.vs)[0]
+ defState := s.getVRegState(def.ID())
+ desired := defState.desiredLoc.realReg()
+ if desired == RealRegInvalid {
+ continue
+ }
+
+ use := instr.Uses(&a.vs)[0]
+ useID := use.ID()
+ useState := s.getVRegState(useID)
+ if s.phiBlk(useID) != succ && useState.desiredLoc == desiredLocUnspecified {
+ useState.desiredLoc = newDesiredLocReg(desired)
+ desiredUpdated = append(desiredUpdated, useID)
+ }
+ }
+ }
+
+ pc = 0
+ for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() {
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Println(instr)
+ }
+
+ var currentUsedSet RegSet
+ killSet := a.reals[:0]
+
+ // Gather the set of registers that will be used in the current instruction.
+ for _, use := range instr.Uses(&a.vs) {
+ if use.IsRealReg() {
+ r := use.RealReg()
+ currentUsedSet = currentUsedSet.add(r)
+ if a.allocatableSet.has(r) {
+ killSet = append(killSet, r)
+ }
+ } else {
+ vs := s.getVRegState(use.ID())
+ if r := vs.r; r != RealRegInvalid {
+ currentUsedSet = currentUsedSet.add(r)
+ }
+ }
+ }
+
+ for i, use := range instr.Uses(&a.vs) {
+ if !use.IsRealReg() {
+ vs := s.getVRegState(use.ID())
+ killed := vs.lastUse == pc
+ r := vs.r
+
+ if r == RealRegInvalid {
+ r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet,
+ // Prefer the desired register if it's available.
+ vs.desiredLoc.realReg())
+ vs.recordReload(f, blk)
+ f.ReloadRegisterBefore(use.SetRealReg(r), instr)
+ s.useRealReg(r, use)
+ }
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("\ttrying to use v%v on %s\n", use.ID(), a.regInfo.RealRegName(r))
+ }
+ instr.AssignUse(i, use.SetRealReg(r))
+ currentUsedSet = currentUsedSet.add(r)
+ if killed {
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("\tkill v%d with %s\n", use.ID(), a.regInfo.RealRegName(r))
+ }
+ killSet = append(killSet, r)
+ }
+ }
+ }
+
+ isIndirect := instr.IsIndirectCall()
+ call := instr.IsCall() || isIndirect
+ if call {
+ addr := RealRegInvalid
+ if instr.IsIndirectCall() {
+ addr = a.vs[0].RealReg()
+ }
+ a.releaseCallerSavedRegs(addr)
+ }
+
+ for _, r := range killSet {
+ s.releaseRealReg(r)
+ }
+ a.reals = killSet
+
+ defs := instr.Defs(&a.vs)
+ switch {
+ case len(defs) > 1:
+ // Some instructions define multiple values on real registers.
+ // E.g. call instructions (following calling convention) / div instruction on x64 that defines both rax and rdx.
+ //
+ // Note that currently I assume that such instructions define only the pre colored real registers, not the VRegs
+ // that require allocations. If we need to support such case, we need to add the logic to handle it here,
+ // though is there any such instruction?
+ for _, def := range defs {
+ if !def.IsRealReg() {
+ panic("BUG: multiple defs should be on real registers")
+ }
+ r := def.RealReg()
+ if s.regsInUse.has(r) {
+ s.releaseRealReg(r)
+ }
+ s.useRealReg(r, def)
+ }
+ case len(defs) == 1:
+ def := defs[0]
+ if def.IsRealReg() {
+ r := def.RealReg()
+ if a.allocatableSet.has(r) {
+ if s.regsInUse.has(r) {
+ s.releaseRealReg(r)
+ }
+ s.useRealReg(r, def)
+ }
+ } else {
+ vState := s.getVRegState(def.ID())
+ r := vState.r
+
+ if desired := vState.desiredLoc.realReg(); desired != RealRegInvalid {
+ if r != desired {
+ if (vState.isPhi && vState.defBlk == succ) ||
+ // If this is not a phi and it's already assigned a real reg,
+ // this value has multiple definitions, hence we cannot assign the desired register.
+ (!s.regsInUse.has(desired) && r == RealRegInvalid) {
+ // If the phi value is passed via a real register, we force the value to be in the desired register.
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("\t\tv%d is phi and desiredReg=%s\n", def.ID(), a.regInfo.RealRegName(desired))
+ }
+ if r != RealRegInvalid {
+ // If the value is already in a different real register, we release it to change the state.
+ // Otherwise, multiple registers might have the same values at the end, which results in
+ // messing up the merge state reconciliation.
+ s.releaseRealReg(r)
+ }
+ r = desired
+ s.releaseRealReg(r)
+ s.useRealReg(r, def)
+ }
+ }
+ }
+
+ // Allocate a new real register if `def` is not currently assigned one.
+ // It can happen when multiple instructions define the same VReg (e.g. const loads).
+ if r == RealRegInvalid {
+ if instr.IsCopy() {
+ copySrc := instr.Uses(&a.vs)[0].RealReg()
+ if a.allocatableSet.has(copySrc) && !s.regsInUse.has(copySrc) {
+ r = copySrc
+ }
+ }
+ if r == RealRegInvalid {
+ typ := def.RegType()
+ r = s.findOrSpillAllocatable(a, a.regInfo.AllocatableRegisters[typ], RegSet(0), RealRegInvalid)
+ }
+ s.useRealReg(r, def)
+ }
+ dr := def.SetRealReg(r)
+ instr.AssignDef(dr)
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("\tdefining v%d with %s\n", def.ID(), a.regInfo.RealRegName(r))
+ }
+ if vState.isPhi {
+ if vState.desiredLoc.stack() { // Stack based phi value.
+ f.StoreRegisterAfter(dr, instr)
+ // Release the real register as it's not used anymore.
+ s.releaseRealReg(r)
+ } else {
+ // Only the register based phis are necessary to track the defining instructions
+ // since the stack-based phis are already having stores inserted ^.
+ n := a.phiDefInstListPool.Allocate()
+ n.instr = instr
+ n.next = vState.phiDefInstList
+ n.v = dr
+ vState.phiDefInstList = n
+ }
+ } else {
+ vState.defInstr = instr
+ vState.defBlk = blk
+ }
+ }
+ }
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Println(instr)
+ }
+ pc++
+ }
+
+ s.regsInUse.range_(func(allocated RealReg, v VReg) {
+ currentBlkState.endRegs.add(allocated, v)
+ })
+
+ currentBlkState.visited = true
+ if wazevoapi.RegAllocLoggingEnabled {
+ currentBlkState.dump(a.regInfo)
+ }
+
+ // Reset the desired end location.
+ for _, v := range desiredUpdated {
+ vs := s.getVRegState(v)
+ vs.desiredLoc = desiredLocUnspecified
+ }
+ a.vs2 = desiredUpdated[:0]
+
+ for i := 0; i < blk.Succs(); i++ {
+ succ := blk.Succ(i)
+ if succ == nil {
+ continue
+ }
+ // If the successor is not visited yet, finalize the start state.
+ a.finalizeStartReg(succ)
+ }
+}
+
+func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) {
+ s := &a.state
+
+ for i := 0; i < 64; i++ {
+ allocated := RealReg(i)
+ if allocated == addrReg { // If this is the call indirect, we should not touch the addr register.
+ continue
+ }
+ if v := s.regsInUse.get(allocated); v.Valid() {
+ if v.IsRealReg() {
+ continue // This is the argument register as it's already used by VReg backed by the corresponding RealReg.
+ }
+ if !a.regInfo.CallerSavedRegisters.has(allocated) {
+ // If this is not a caller-saved register, it is safe to keep it across the call.
+ continue
+ }
+ s.releaseRealReg(allocated)
+ }
+ }
+}
+
+func (a *Allocator) fixMergeState(f Function, blk Block) {
+ preds := blk.Preds()
+ if preds <= 1 {
+ return
+ }
+
+ s := &a.state
+
+ // Restores the state at the beginning of the block.
+ bID := blk.ID()
+ blkSt := a.getOrAllocateBlockState(bID)
+ desiredOccupants := &blkSt.startRegs
+ aliveOnRegVRegs := make(map[VReg]RealReg)
+ for i := 0; i < 64; i++ {
+ r := RealReg(i)
+ if v := blkSt.startRegs.get(r); v.Valid() {
+ aliveOnRegVRegs[v] = r
+ }
+ }
+
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Println("fixMergeState", blk.ID(), ":", desiredOccupants.format(a.regInfo))
+ }
+
+ s.currentBlockID = bID
+ a.updateLiveInVRState(a.getOrAllocateBlockState(bID))
+
+ currentOccupants := &a.currentOccupants
+ for i := 0; i < preds; i++ {
+ currentOccupants.reset()
+ if i == blkSt.startFromPredIndex {
+ continue
+ }
+
+ currentOccupantsRev := make(map[VReg]RealReg)
+ pred := blk.Pred(i)
+ predSt := a.getOrAllocateBlockState(pred.ID())
+ for ii := 0; ii < 64; ii++ {
+ r := RealReg(ii)
+ if v := predSt.endRegs.get(r); v.Valid() {
+ if _, ok := aliveOnRegVRegs[v]; !ok {
+ continue
+ }
+ currentOccupants.add(r, v)
+ currentOccupantsRev[v] = r
+ }
+ }
+
+ s.resetAt(predSt)
+
+ // Finds the free registers if any.
+ intTmp, floatTmp := VRegInvalid, VRegInvalid
+ if intFree := s.findAllocatable(
+ a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupants.set,
+ ); intFree != RealRegInvalid {
+ intTmp = FromRealReg(intFree, RegTypeInt)
+ }
+ if floatFree := s.findAllocatable(
+ a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupants.set,
+ ); floatFree != RealRegInvalid {
+ floatTmp = FromRealReg(floatFree, RegTypeFloat)
+ }
+
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo))
+ }
+
+ for ii := 0; ii < 64; ii++ {
+ r := RealReg(ii)
+ desiredVReg := desiredOccupants.get(r)
+ if !desiredVReg.Valid() {
+ continue
+ }
+
+ currentVReg := currentOccupants.get(r)
+ if desiredVReg.ID() == currentVReg.ID() {
+ continue
+ }
+
+ typ := desiredVReg.RegType()
+ var tmpRealReg VReg
+ if typ == RegTypeInt {
+ tmpRealReg = intTmp
+ } else {
+ tmpRealReg = floatTmp
+ }
+ a.reconcileEdge(f, r, pred, currentOccupants, currentOccupantsRev, currentVReg, desiredVReg, tmpRealReg, typ)
+ }
+ }
+}
+
+func (a *Allocator) reconcileEdge(f Function,
+ r RealReg,
+ pred Block,
+ currentOccupants *regInUseSet,
+ currentOccupantsRev map[VReg]RealReg,
+ currentVReg, desiredVReg VReg,
+ freeReg VReg,
+ typ RegType,
+) {
+ s := &a.state
+ if currentVReg.Valid() {
+ // Both are on reg.
+ er, ok := currentOccupantsRev[desiredVReg]
+ if !ok {
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n",
+ desiredVReg.ID(), a.regInfo.RealRegName(r),
+ )
+ }
+ // This case is that the desired value is on the stack, but currentVReg is on the target register.
+ // We need to move the current value to the stack, and reload the desired value.
+ // TODO: we can do better here.
+ f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion())
+ delete(currentOccupantsRev, currentVReg)
+
+ s.getVRegState(desiredVReg.ID()).recordReload(f, pred)
+ f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
+ currentOccupants.add(r, desiredVReg)
+ currentOccupantsRev[desiredVReg] = r
+ return
+ }
+
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n",
+ desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er),
+ )
+ }
+ f.SwapBefore(
+ currentVReg.SetRealReg(r),
+ desiredVReg.SetRealReg(er),
+ freeReg,
+ pred.LastInstrForInsertion(),
+ )
+ s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg())
+ currentOccupantsRev[desiredVReg] = r
+ currentOccupantsRev[currentVReg] = er
+ currentOccupants.add(r, desiredVReg)
+ currentOccupants.add(er, currentVReg)
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er))
+ }
+ } else {
+ // Desired is on reg, but currently the target register is not used.
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("\t\tv%d is desired to be on %s, current not used\n",
+ desiredVReg.ID(), a.regInfo.RealRegName(r),
+ )
+ }
+ if currentReg, ok := currentOccupantsRev[desiredVReg]; ok {
+ f.InsertMoveBefore(
+ FromRealReg(r, typ),
+ desiredVReg.SetRealReg(currentReg),
+ pred.LastInstrForInsertion(),
+ )
+ currentOccupants.remove(currentReg)
+ } else {
+ s.getVRegState(desiredVReg.ID()).recordReload(f, pred)
+ f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
+ }
+ currentOccupantsRev[desiredVReg] = r
+ currentOccupants.add(r, desiredVReg)
+ }
+
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo))
+ }
+}
+
+func (a *Allocator) scheduleSpills(f Function) {
+ states := a.state.vrStates
+ for i := 0; i <= states.MaxIDEncountered(); i++ {
+ vs := states.Get(i)
+ if vs == nil {
+ continue
+ }
+ if vs.spilled {
+ a.scheduleSpill(f, vs)
+ }
+ }
+}
+
+func (a *Allocator) scheduleSpill(f Function, vs *vrState) {
+ v := vs.v
+ // If the value is the phi value, we need to insert a spill after each phi definition.
+ if vs.isPhi {
+ for defInstr := vs.phiDefInstList; defInstr != nil; defInstr = defInstr.next {
+ f.StoreRegisterAfter(defInstr.v, defInstr.instr)
+ }
+ return
+ }
+
+ pos := vs.lca
+ definingBlk := vs.defBlk
+ r := RealRegInvalid
+ if definingBlk == nil {
+ panic(fmt.Sprintf("BUG: definingBlk should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))
+ }
+ if pos == nil {
+ panic(fmt.Sprintf("BUG: pos should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))
+ }
+
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("v%d is spilled in blk%d, lca=blk%d\n", v.ID(), definingBlk.ID(), pos.ID())
+ }
+ for pos != definingBlk {
+ st := a.getOrAllocateBlockState(pos.ID())
+ for ii := 0; ii < 64; ii++ {
+ rr := RealReg(ii)
+ if st.startRegs.get(rr) == v {
+ r = rr
+ // Already in the register, so we can place the spill at the beginning of the block.
+ break
+ }
+ }
+
+ if r != RealRegInvalid {
+ break
+ }
+
+ pos = f.Idom(pos)
+ }
+
+ if pos == definingBlk {
+ defInstr := vs.defInstr
+ defInstr.Defs(&a.vs)
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("schedule spill v%d after %v\n", v.ID(), defInstr)
+ }
+ f.StoreRegisterAfter(a.vs[0], defInstr)
+ } else {
+ // Found an ancestor block that holds the value in the register at the beginning of the block.
+ // We need to insert a spill before the last use.
+ first := pos.FirstInstr()
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("schedule spill v%d before %v\n", v.ID(), first)
+ }
+ f.StoreRegisterAfter(v.SetRealReg(r), first)
+ }
+}
+
+// Reset resets the allocator's internal state so that it can be reused.
+func (a *Allocator) Reset() {
+ a.state.reset()
+ a.blockStates.Reset()
+ a.phiDefInstListPool.Reset()
+ a.vs = a.vs[:0]
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
new file mode 100644
index 000000000..e9bf60661
--- /dev/null
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
@@ -0,0 +1,108 @@
+package regalloc
+
+import (
+ "fmt"
+ "strings"
+)
+
+// NewRegSet returns a new RegSet with the given registers.
+func NewRegSet(regs ...RealReg) RegSet {
+ var ret RegSet
+ for _, r := range regs {
+ ret = ret.add(r)
+ }
+ return ret
+}
+
+// RegSet represents a set of registers.
+type RegSet uint64
+
+func (rs RegSet) format(info *RegisterInfo) string { //nolint:unused
+ var ret []string
+ for i := 0; i < 64; i++ {
+ if rs&(1<<uint(i)) != 0 {
+ ret = append(ret, info.RealRegName(RealReg(i)))
+ }
+ }
+ return strings.Join(ret, ", ")
+}
+
+func (rs RegSet) has(r RealReg) bool {
+ return rs&(1<<uint(r)) != 0
+}
+
+func (rs RegSet) add(r RealReg) RegSet {
+ if r >= 64 {
+ return rs
+ }
+ return rs | 1<<uint(r)
+}
+
+func (rs RegSet) Range(f func(allocatedRealReg RealReg)) {
+ for i := 0; i < 64; i++ {
+ if rs&(1<<uint(i)) != 0 {
+ f(RealReg(i))
+ }
+ }
+}
+
+type regInUseSet struct {
+ set RegSet
+ vrs [64]VReg
+}
+
+func (rs *regInUseSet) reset() {
+ rs.set = 0
+ for i := range rs.vrs {
+ rs.vrs[i] = VRegInvalid
+ }
+}
+
+func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused
+ var ret []string
+ for i := 0; i < 64; i++ {
+ if rs.set&(1<<uint(i)) != 0 {
+ vr := rs.vrs[i]
+ ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.ID()))
+ }
+ }
+ return strings.Join(ret, ", ")
+}
+
+func (rs *regInUseSet) has(r RealReg) bool {
+ if r >= 64 {
+ return false
+ }
+ return rs.set&(1<<uint(r)) != 0
+}
+
+func (rs *regInUseSet) get(r RealReg) VReg {
+ if r >= 64 {
+ return VRegInvalid
+ }
+ return rs.vrs[r]
+}
+
+func (rs *regInUseSet) remove(r RealReg) {
+ if r >= 64 {
+ return
+ }
+ rs.set &= ^(1 << uint(r))
+ rs.vrs[r] = VRegInvalid
+}
+
+func (rs *regInUseSet) add(r RealReg, vr VReg) {
+ if r >= 64 {
+ return
+ }
+ rs.set |= 1 << uint(r)
+ rs.vrs[r] = vr
+}
+
+func (rs *regInUseSet) range_(f func(allocatedRealReg RealReg, vr VReg)) {
+ for i := 0; i < 64; i++ {
+ if rs.set&(1<<uint(i)) != 0 {
+ f(RealReg(i), rs.vrs[i])
+ }
+ }
+}