diff options
Diffstat (limited to 'vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa')
21 files changed, 990 insertions, 480 deletions
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go index 751050aff..96f035e58 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go @@ -14,7 +14,6 @@ var calleeSavedVRegs = []regalloc.VReg{  // CompileGoFunctionTrampoline implements backend.Machine.  func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte { -	ectx := m.ectx  	argBegin := 1 // Skips exec context by default.  	if needModuleContextPtr {  		argBegin++ @@ -25,7 +24,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *  	m.currentABI = abi  	cur := m.allocateNop() -	ectx.RootInstr = cur +	m.rootInstr = cur  	// Execution context is always the first argument.  	execCtrPtr := raxVReg @@ -272,7 +271,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *  	cur = m.revertRBPRSP(cur)  	linkInstr(cur, m.allocateInstr().asRet()) -	m.encodeWithoutSSA(ectx.RootInstr) +	m.encodeWithoutSSA(m.rootInstr)  	return m.c.Buf()  } @@ -347,10 +346,8 @@ var stackGrowSaveVRegs = []regalloc.VReg{  // CompileStackGrowCallSequence implements backend.Machine.  func (m *machine) CompileStackGrowCallSequence() []byte { -	ectx := m.ectx -  	cur := m.allocateNop() -	ectx.RootInstr = cur +	m.rootInstr = cur  	cur = m.setupRBPRSP(cur) @@ -379,7 +376,7 @@ func (m *machine) CompileStackGrowCallSequence() []byte {  	cur = m.revertRBPRSP(cur)  	linkInstr(cur, m.allocateInstr().asRet()) -	m.encodeWithoutSSA(ectx.RootInstr) +	m.encodeWithoutSSA(m.rootInstr)  	return m.c.Buf()  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go index d27e79c0e..6a3e58f51 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go @@ -17,16 +17,6 @@ type instruction struct {  	kind                instructionKind  } -// Next implements regalloc.Instr. -func (i *instruction) Next() regalloc.Instr { -	return i.next -} - -// Prev implements regalloc.Instr. -func (i *instruction) Prev() regalloc.Instr { -	return i.prev -} -  // IsCall implements regalloc.Instr.  func (i *instruction) IsCall() bool { return i.kind == call } @@ -36,9 +26,6 @@ func (i *instruction) IsIndirectCall() bool { return i.kind == callIndirect }  // IsReturn implements regalloc.Instr.  func (i *instruction) IsReturn() bool { return i.kind == ret } -// AddedBeforeRegAlloc implements regalloc.Instr. -func (i *instruction) AddedBeforeRegAlloc() bool { return i.addedBeforeRegAlloc } -  // String implements regalloc.Instr.  func (i *instruction) String() string {  	switch i.kind { @@ -651,26 +638,14 @@ func resetInstruction(i *instruction) {  	*i = instruction{}  } -func setNext(i *instruction, next *instruction) { -	i.next = next -} - -func setPrev(i *instruction, prev *instruction) { -	i.prev = prev -} - -func asNop(i *instruction) { -	i.kind = nop0 -} - -func (i *instruction) asNop0WithLabel(label backend.Label) *instruction { //nolint +func (i *instruction) asNop0WithLabel(label label) *instruction { //nolint  	i.kind = nop0  	i.u1 = uint64(label)  	return i  } -func (i *instruction) nop0Label() backend.Label { -	return backend.Label(i.u1) +func (i *instruction) nop0Label() label { +	return label(i.u1)  }  type instructionKind byte @@ -1161,7 +1136,7 @@ func (i *instruction) asJmp(target operand) *instruction {  	return i  } -func (i *instruction) jmpLabel() backend.Label { +func (i *instruction) jmpLabel() label {  	switch i.kind {  	case jmp, jmpIf, lea, xmmUnaryRmR:  		return i.op1.label() diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go index bee673d25..befe8c643 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go @@ -130,9 +130,9 @@ func (m *machine) lowerAddendsToAmode(x, y addend, offBase uint32) *amode {  	}  } -func (m *machine) lowerAddend(x *backend.SSAValueDefinition) addend { -	if x.IsFromBlockParam() { -		return addend{x.BlkParamVReg, 0, 0} +func (m *machine) lowerAddend(x backend.SSAValueDefinition) addend { +	if !x.IsFromInstr() { +		return addend{m.c.VRegOf(x.V), 0, 0}  	}  	// Ensure the addend is not referenced in multiple places; we will discard nested Iadds.  	op := m.c.MatchInstrOneOf(x, addendsMatchOpcodes[:]) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go index 61ae6f406..aeeb6b645 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go @@ -16,18 +16,13 @@ import (  // NewBackend returns a new backend for arm64.  func NewBackend() backend.Machine { -	ectx := backend.NewExecutableContextT[instruction]( -		resetInstruction, -		setNext, -		setPrev, -		asNop, -	) -	return &machine{ -		ectx:                                ectx, +	m := &machine{  		cpuFeatures:                         platform.CpuFeatures, -		regAlloc:                            regalloc.NewAllocator(regInfo), +		regAlloc:                            regalloc.NewAllocator[*instruction, *labelPosition, *regAllocFn](regInfo),  		spillSlots:                          map[regalloc.VRegID]int64{},  		amodePool:                           wazevoapi.NewPool[amode](nil), +		labelPositionPool:                   wazevoapi.NewIDedPool[labelPosition](resetLabelPosition), +		instrPool:                           wazevoapi.NewPool[instruction](resetInstruction),  		constSwizzleMaskConstIndex:          -1,  		constSqmulRoundSatIndex:             -1,  		constI8x16SHLMaskTableIndex:         -1, @@ -41,23 +36,46 @@ func NewBackend() backend.Machine {  		constExtAddPairwiseI16x8uMask1Index: -1,  		constExtAddPairwiseI16x8uMask2Index: -1,  	} +	m.regAllocFn.m = m +	return m  }  type (  	// machine implements backend.Machine for amd64.  	machine struct {  		c                        backend.Compiler -		ectx                     *backend.ExecutableContextT[instruction]  		stackBoundsCheckDisabled bool +		instrPool wazevoapi.Pool[instruction]  		amodePool wazevoapi.Pool[amode]  		cpuFeatures platform.CpuFeatureFlags -		regAlloc        regalloc.Allocator -		regAllocFn      *backend.RegAllocFunction[*instruction, *machine] +		regAlloc        regalloc.Allocator[*instruction, *labelPosition, *regAllocFn] +		regAllocFn      regAllocFn  		regAllocStarted bool +		// labelPositionPool is the pool of labelPosition. The id is the label where +		// if the label is less than the maxSSABlockID, it's the ssa.BasicBlockID. +		labelPositionPool wazevoapi.IDedPool[labelPosition] +		// nextLabel is the next label to be allocated. The first free label comes after maxSSABlockID +		// so that we can have an identical label for the SSA block ID, which is useful for debugging. +		nextLabel label +		// rootInstr is the first instruction of the function. +		rootInstr *instruction +		// currentLabelPos is the currently-compiled ssa.BasicBlock's labelPosition. +		currentLabelPos *labelPosition +		// orderedSSABlockLabelPos is the ordered list of labelPosition in the generated code for each ssa.BasicBlock. +		orderedSSABlockLabelPos []*labelPosition +		// returnLabelPos is the labelPosition for the return block. +		returnLabelPos labelPosition +		// perBlockHead and perBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock. +		perBlockHead, perBlockEnd *instruction +		// pendingInstructions are the instructions which are not yet emitted into the instruction list. +		pendingInstructions []*instruction +		// maxSSABlockID is the maximum ssa.BasicBlockID in the current function. +		maxSSABlockID label +  		spillSlotSize int64  		spillSlots    map[regalloc.VRegID]int64  		currentABI    *backend.FunctionABI @@ -67,8 +85,11 @@ type (  		labelResolutionPends []labelResolutionPend +		// jmpTableTargets holds the labels of the jump table targets.  		jmpTableTargets [][]uint32 -		consts          []_const +		// jmpTableTargetNext is the index to the jmpTableTargets slice to be used for the next jump table. +		jmpTableTargetsNext int +		consts              []_const  		constSwizzleMaskConstIndex, constSqmulRoundSatIndex,  		constI8x16SHLMaskTableIndex, constI8x16LogicalSHRMaskTableIndex, @@ -79,9 +100,10 @@ type (  	}  	_const struct { -		lo, hi uint64 -		_var   []byte -		label  *labelPosition +		lo, hi   uint64 +		_var     []byte +		label    label +		labelPos *labelPosition  	}  	labelResolutionPend struct { @@ -90,22 +112,73 @@ type (  		// imm32Offset is the offset of the last 4 bytes of the instruction.  		imm32Offset int64  	} +) -	labelPosition = backend.LabelPosition[instruction] +type ( +	// label represents a position in the generated code which is either +	// a real instruction or the constant InstructionPool (e.g. jump tables). +	// +	// This is exactly the same as the traditional "label" in assembly code. +	label uint32 + +	// labelPosition represents the regions of the generated code which the label represents. +	// This implements regalloc.Block. +	labelPosition struct { +		// sb is not nil if this corresponds to a ssa.BasicBlock. +		sb ssa.BasicBlock +		// cur is used to walk through the instructions in the block during the register allocation. +		cur, +		// begin and end are the first and last instructions of the block. +		begin, end *instruction +		// binaryOffset is the offset in the binary where the label is located. +		binaryOffset int64 +	}  ) -func (m *machine) getOrAllocateConstLabel(i *int, _var []byte) backend.Label { +// String implements backend.Machine. +func (l label) String() string { +	return fmt.Sprintf("L%d", l) +} + +func resetLabelPosition(l *labelPosition) { +	*l = labelPosition{} +} + +const labelReturn = math.MaxUint32 + +func ssaBlockLabel(sb ssa.BasicBlock) label { +	if sb.ReturnBlock() { +		return labelReturn +	} +	return label(sb.ID()) +} + +// getOrAllocateSSABlockLabelPosition returns the labelPosition for the given basic block. +func (m *machine) getOrAllocateSSABlockLabelPosition(sb ssa.BasicBlock) *labelPosition { +	if sb.ReturnBlock() { +		m.returnLabelPos.sb = sb +		return &m.returnLabelPos +	} + +	l := ssaBlockLabel(sb) +	pos := m.labelPositionPool.GetOrAllocate(int(l)) +	pos.sb = sb +	return pos +} + +func (m *machine) getOrAllocateConstLabel(i *int, _var []byte) label {  	index := *i  	if index == -1 { -		label := m.allocateLabel() +		l, pos := m.allocateLabel()  		index = len(m.consts)  		m.consts = append(m.consts, _const{ -			_var:  _var, -			label: label, +			_var:     _var, +			label:    l, +			labelPos: pos,  		})  		*i = index  	} -	return m.consts[index].label.L +	return m.consts[index].label  }  // Reset implements backend.Machine. @@ -120,18 +193,20 @@ func (m *machine) Reset() {  	}  	m.stackBoundsCheckDisabled = false -	m.ectx.Reset() - -	m.regAllocFn.Reset()  	m.regAlloc.Reset() +	m.labelPositionPool.Reset() +	m.instrPool.Reset()  	m.regAllocStarted = false  	m.clobberedRegs = m.clobberedRegs[:0]  	m.spillSlotSize = 0  	m.maxRequiredStackSizeForCalls = 0 +	m.perBlockHead, m.perBlockEnd, m.rootInstr = nil, nil, nil +	m.pendingInstructions = m.pendingInstructions[:0] +	m.orderedSSABlockLabelPos = m.orderedSSABlockLabelPos[:0]  	m.amodePool.Reset() -	m.jmpTableTargets = m.jmpTableTargets[:0] +	m.jmpTableTargetsNext = 0  	m.constSwizzleMaskConstIndex = -1  	m.constSqmulRoundSatIndex = -1  	m.constI8x16SHLMaskTableIndex = -1 @@ -146,8 +221,63 @@ func (m *machine) Reset() {  	m.constExtAddPairwiseI16x8uMask2Index = -1  } -// ExecutableContext implements backend.Machine. -func (m *machine) ExecutableContext() backend.ExecutableContext { return m.ectx } +// StartLoweringFunction implements backend.Machine StartLoweringFunction. +func (m *machine) StartLoweringFunction(maxBlockID ssa.BasicBlockID) { +	m.maxSSABlockID = label(maxBlockID) +	m.nextLabel = label(maxBlockID) + 1 +} + +// LinkAdjacentBlocks implements backend.Machine. +func (m *machine) LinkAdjacentBlocks(prev, next ssa.BasicBlock) { +	prevPos, nextPos := m.getOrAllocateSSABlockLabelPosition(prev), m.getOrAllocateSSABlockLabelPosition(next) +	prevPos.end.next = nextPos.begin +} + +// StartBlock implements backend.Machine. +func (m *machine) StartBlock(blk ssa.BasicBlock) { +	m.currentLabelPos = m.getOrAllocateSSABlockLabelPosition(blk) +	labelPos := m.currentLabelPos +	end := m.allocateNop() +	m.perBlockHead, m.perBlockEnd = end, end +	labelPos.begin, labelPos.end = end, end +	m.orderedSSABlockLabelPos = append(m.orderedSSABlockLabelPos, labelPos) +} + +// EndBlock implements ExecutableContext. +func (m *machine) EndBlock() { +	// Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions. +	m.insertAtPerBlockHead(m.allocateNop()) + +	m.currentLabelPos.begin = m.perBlockHead + +	if m.currentLabelPos.sb.EntryBlock() { +		m.rootInstr = m.perBlockHead +	} +} + +func (m *machine) insertAtPerBlockHead(i *instruction) { +	if m.perBlockHead == nil { +		m.perBlockHead = i +		m.perBlockEnd = i +		return +	} + +	i.next = m.perBlockHead +	m.perBlockHead.prev = i +	m.perBlockHead = i +} + +// FlushPendingInstructions implements backend.Machine. +func (m *machine) FlushPendingInstructions() { +	l := len(m.pendingInstructions) +	if l == 0 { +		return +	} +	for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order. +		m.insertAtPerBlockHead(m.pendingInstructions[i]) +	} +	m.pendingInstructions = m.pendingInstructions[:0] +}  // DisableStackCheck implements backend.Machine.  func (m *machine) DisableStackCheck() { m.stackBoundsCheckDisabled = true } @@ -155,23 +285,17 @@ func (m *machine) DisableStackCheck() { m.stackBoundsCheckDisabled = true }  // SetCompiler implements backend.Machine.  func (m *machine) SetCompiler(c backend.Compiler) {  	m.c = c -	m.regAllocFn = backend.NewRegAllocFunction[*instruction, *machine](m, c.SSABuilder(), c) +	m.regAllocFn.ssaB = c.SSABuilder()  }  // SetCurrentABI implements backend.Machine. -func (m *machine) SetCurrentABI(abi *backend.FunctionABI) { -	m.currentABI = abi -} +func (m *machine) SetCurrentABI(abi *backend.FunctionABI) { m.currentABI = abi }  // RegAlloc implements backend.Machine.  func (m *machine) RegAlloc() {  	rf := m.regAllocFn -	for _, pos := range m.ectx.OrderedBlockLabels { -		rf.AddBlock(pos.SB, pos.L, pos.Begin, pos.End) -	} -  	m.regAllocStarted = true -	m.regAlloc.DoAllocation(rf) +	m.regAlloc.DoAllocation(&rf)  	// Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes.  	m.spillSlotSize = (m.spillSlotSize + 15) &^ 15  } @@ -184,49 +308,54 @@ func (m *machine) InsertReturn() {  // LowerSingleBranch implements backend.Machine.  func (m *machine) LowerSingleBranch(b *ssa.Instruction) { -	ectx := m.ectx  	switch b.Opcode() {  	case ssa.OpcodeJump: -		_, _, targetBlk := b.BranchData() +		_, _, targetBlkID := b.BranchData()  		if b.IsFallthroughJump() {  			return  		}  		jmp := m.allocateInstr() -		target := ectx.GetOrAllocateSSABlockLabel(targetBlk) -		if target == backend.LabelReturn { +		target := ssaBlockLabel(m.c.SSABuilder().BasicBlock(targetBlkID)) +		if target == labelReturn {  			jmp.asRet()  		} else {  			jmp.asJmp(newOperandLabel(target))  		}  		m.insert(jmp)  	case ssa.OpcodeBrTable: -		index, target := b.BrTableData() -		m.lowerBrTable(index, target) +		index, targetBlkIDs := b.BrTableData() +		m.lowerBrTable(index, targetBlkIDs)  	default:  		panic("BUG: unexpected branch opcode" + b.Opcode().String())  	}  } -func (m *machine) addJmpTableTarget(targets []ssa.BasicBlock) (index int) { -	// TODO: reuse the slice! -	labels := make([]uint32, len(targets)) -	for j, target := range targets { -		labels[j] = uint32(m.ectx.GetOrAllocateSSABlockLabel(target)) +func (m *machine) addJmpTableTarget(targets ssa.Values) (index int) { +	if m.jmpTableTargetsNext == len(m.jmpTableTargets) { +		m.jmpTableTargets = append(m.jmpTableTargets, make([]uint32, 0, len(targets.View()))) +	} + +	index = m.jmpTableTargetsNext +	m.jmpTableTargetsNext++ +	m.jmpTableTargets[index] = m.jmpTableTargets[index][:0] +	for _, targetBlockID := range targets.View() { +		target := m.c.SSABuilder().BasicBlock(ssa.BasicBlockID(targetBlockID)) +		m.jmpTableTargets[index] = append(m.jmpTableTargets[index], uint32(ssaBlockLabel(target)))  	} -	index = len(m.jmpTableTargets) -	m.jmpTableTargets = append(m.jmpTableTargets, labels)  	return  }  var condBranchMatches = [...]ssa.Opcode{ssa.OpcodeIcmp, ssa.OpcodeFcmp} -func (m *machine) lowerBrTable(index ssa.Value, targets []ssa.BasicBlock) { +func (m *machine) lowerBrTable(index ssa.Value, targets ssa.Values) {  	_v := m.getOperand_Reg(m.c.ValueDefinition(index))  	v := m.copyToTmp(_v.reg()) +	targetCount := len(targets.View()) +  	// First, we need to do the bounds check.  	maxIndex := m.c.AllocateVReg(ssa.TypeI32) -	m.lowerIconst(maxIndex, uint64(len(targets)-1), false) +	m.lowerIconst(maxIndex, uint64(targetCount-1), false)  	cmp := m.allocateInstr().asCmpRmiR(true, newOperandReg(maxIndex), v, false)  	m.insert(cmp) @@ -255,23 +384,22 @@ func (m *machine) lowerBrTable(index ssa.Value, targets []ssa.BasicBlock) {  	jmpTable := m.allocateInstr()  	targetSliceIndex := m.addJmpTableTarget(targets) -	jmpTable.asJmpTableSequence(targetSliceIndex, len(targets)) +	jmpTable.asJmpTableSequence(targetSliceIndex, targetCount)  	m.insert(jmpTable)  }  // LowerConditionalBranch implements backend.Machine.  func (m *machine) LowerConditionalBranch(b *ssa.Instruction) { -	exctx := m.ectx -	cval, args, targetBlk := b.BranchData() +	cval, args, targetBlkID := b.BranchData()  	if len(args) > 0 {  		panic(fmt.Sprintf(  			"conditional branch shouldn't have args; likely a bug in critical edge splitting: from %s to %s", -			exctx.CurrentSSABlk, -			targetBlk, +			m.currentLabelPos.sb, +			targetBlkID,  		))  	} -	target := exctx.GetOrAllocateSSABlockLabel(targetBlk) +	target := ssaBlockLabel(m.c.SSABuilder().BasicBlock(targetBlkID))  	cvalDef := m.c.ValueDefinition(cval)  	switch m.c.MatchInstrOneOf(cvalDef, condBranchMatches[:]) { @@ -1272,9 +1400,9 @@ func (m *machine) lowerVconst(dst regalloc.VReg, lo, hi uint64) {  	}  	load := m.allocateInstr() -	constLabel := m.allocateLabel() -	m.consts = append(m.consts, _const{label: constLabel, lo: lo, hi: hi}) -	load.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(constLabel.L)), dst) +	l, pos := m.allocateLabel() +	m.consts = append(m.consts, _const{label: l, labelPos: pos, lo: lo, hi: hi}) +	load.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(l)), dst)  	m.insert(load)  } @@ -1473,21 +1601,24 @@ func (m *machine) lowerExitIfTrueWithCode(execCtx regalloc.VReg, cond ssa.Value,  	jmpIf.asJmpIf(condFromSSAIntCmpCond(c).invert(), newOperandLabel(l))  } -func (m *machine) tryLowerBandToFlag(x, y *backend.SSAValueDefinition) (ok bool) { -	var target *backend.SSAValueDefinition +func (m *machine) tryLowerBandToFlag(x, y backend.SSAValueDefinition) (ok bool) { +	var target backend.SSAValueDefinition +	var got bool  	if x.IsFromInstr() && x.Instr.Constant() && x.Instr.ConstantVal() == 0 {  		if m.c.MatchInstr(y, ssa.OpcodeBand) {  			target = y +			got = true  		}  	}  	if y.IsFromInstr() && y.Instr.Constant() && y.Instr.ConstantVal() == 0 {  		if m.c.MatchInstr(x, ssa.OpcodeBand) {  			target = x +			got = true  		}  	} -	if target == nil { +	if !got {  		return false  	} @@ -1522,7 +1653,7 @@ func (m *machine) allocateExitInstructions(execCtx, exitCodeReg regalloc.VReg) (  	return  } -func (m *machine) lowerExitWithCode(execCtx regalloc.VReg, code wazevoapi.ExitCode) (afterLabel backend.Label) { +func (m *machine) lowerExitWithCode(execCtx regalloc.VReg, code wazevoapi.ExitCode) (afterLabel label) {  	exitCodeReg := rbpVReg  	saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtx, exitCodeReg) @@ -1819,9 +1950,9 @@ func (m *machine) lowerCall(si *ssa.Instruction) {  // callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the  // caller side of the function call. -func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, stackSlotSize int64) { +func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def backend.SSAValueDefinition, stackSlotSize int64) {  	arg := &a.Args[argIndex] -	if def != nil && def.IsFromInstr() { +	if def.IsFromInstr() {  		// Constant instructions are inlined.  		if inst := def.Instr; inst.Constant() {  			m.insertLoadConstant(inst, reg) @@ -1904,25 +2035,20 @@ func (m *machine) InsertMove(dst, src regalloc.VReg, typ ssa.Type) {  // Format implements backend.Machine.  func (m *machine) Format() string { -	ectx := m.ectx -	begins := map[*instruction]backend.Label{} -	for _, pos := range ectx.LabelPositions { +	begins := map[*instruction]label{} +	for l := label(0); l < m.nextLabel; l++ { +		pos := m.labelPositionPool.Get(int(l))  		if pos != nil { -			begins[pos.Begin] = pos.L +			begins[pos.begin] = l  		}  	} -	irBlocks := map[backend.Label]ssa.BasicBlockID{} -	for i, l := range ectx.SsaBlockIDToLabels { -		irBlocks[l] = ssa.BasicBlockID(i) -	} -  	var lines []string -	for cur := ectx.RootInstr; cur != nil; cur = cur.next { +	for cur := m.rootInstr; cur != nil; cur = cur.next {  		if l, ok := begins[cur]; ok {  			var labelStr string -			if blkID, ok := irBlocks[l]; ok { -				labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID) +			if l <= m.maxSSABlockID { +				labelStr = fmt.Sprintf("%s (SSA Block: blk%d):", l, l)  			} else {  				labelStr = fmt.Sprintf("%s:", l)  			} @@ -1935,9 +2061,9 @@ func (m *machine) Format() string {  	}  	for _, vc := range m.consts {  		if vc._var == nil { -			lines = append(lines, fmt.Sprintf("%s: const [%d %d]", vc.label.L, vc.lo, vc.hi)) +			lines = append(lines, fmt.Sprintf("%s: const [%d %d]", vc.label, vc.lo, vc.hi))  		} else { -			lines = append(lines, fmt.Sprintf("%s: const %#x", vc.label.L, vc._var)) +			lines = append(lines, fmt.Sprintf("%s: const %#x", vc.label, vc._var))  		}  	}  	return "\n" + strings.Join(lines, "\n") + "\n" @@ -1945,18 +2071,14 @@ func (m *machine) Format() string {  func (m *machine) encodeWithoutSSA(root *instruction) {  	m.labelResolutionPends = m.labelResolutionPends[:0] -	ectx := m.ectx -  	bufPtr := m.c.BufPtr()  	for cur := root; cur != nil; cur = cur.next {  		offset := int64(len(*bufPtr))  		if cur.kind == nop0 {  			l := cur.nop0Label() -			if int(l) >= len(ectx.LabelPositions) { -				continue -			} -			if pos := ectx.LabelPositions[l]; pos != nil { -				pos.BinaryOffset = offset +			pos := m.labelPositionPool.Get(int(l)) +			if pos != nil { +				pos.binaryOffset = offset  			}  		} @@ -1973,7 +2095,7 @@ func (m *machine) encodeWithoutSSA(root *instruction) {  		switch p.instr.kind {  		case jmp, jmpIf, lea:  			target := p.instr.jmpLabel() -			targetOffset := ectx.LabelPositions[target].BinaryOffset +			targetOffset := m.labelPositionPool.Get(int(target)).binaryOffset  			imm32Offset := p.imm32Offset  			jmpOffset := int32(targetOffset - (p.imm32Offset + 4)) // +4 because RIP points to the next instruction.  			binary.LittleEndian.PutUint32((*bufPtr)[imm32Offset:], uint32(jmpOffset)) @@ -1985,33 +2107,33 @@ func (m *machine) encodeWithoutSSA(root *instruction) {  // Encode implements backend.Machine Encode.  func (m *machine) Encode(ctx context.Context) (err error) { -	ectx := m.ectx  	bufPtr := m.c.BufPtr()  	var fn string  	var fnIndex int -	var labelToSSABlockID map[backend.Label]ssa.BasicBlockID +	var labelPosToLabel map[*labelPosition]label  	if wazevoapi.PerfMapEnabled {  		fn = wazevoapi.GetCurrentFunctionName(ctx) -		labelToSSABlockID = make(map[backend.Label]ssa.BasicBlockID) -		for i, l := range ectx.SsaBlockIDToLabels { -			labelToSSABlockID[l] = ssa.BasicBlockID(i) +		labelPosToLabel = make(map[*labelPosition]label) +		for i := 0; i <= m.labelPositionPool.MaxIDEncountered(); i++ { +			pos := m.labelPositionPool.Get(i) +			labelPosToLabel[pos] = label(i)  		}  		fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx)  	}  	m.labelResolutionPends = m.labelResolutionPends[:0] -	for _, pos := range ectx.OrderedBlockLabels { +	for _, pos := range m.orderedSSABlockLabelPos {  		offset := int64(len(*bufPtr)) -		pos.BinaryOffset = offset -		for cur := pos.Begin; cur != pos.End.next; cur = cur.next { +		pos.binaryOffset = offset +		for cur := pos.begin; cur != pos.end.next; cur = cur.next {  			offset := int64(len(*bufPtr))  			switch cur.kind {  			case nop0:  				l := cur.nop0Label() -				if pos := ectx.LabelPositions[l]; pos != nil { -					pos.BinaryOffset = offset +				if pos := m.labelPositionPool.Get(int(l)); pos != nil { +					pos.binaryOffset = offset  				}  			case sourceOffsetInfo:  				m.c.AddSourceOffsetInfo(offset, cur.sourceOffsetInfo()) @@ -2026,22 +2148,16 @@ func (m *machine) Encode(ctx context.Context) (err error) {  		}  		if wazevoapi.PerfMapEnabled { -			l := pos.L -			var labelStr string -			if blkID, ok := labelToSSABlockID[l]; ok { -				labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID) -			} else { -				labelStr = l.String() -			} +			l := labelPosToLabel[pos]  			size := int64(len(*bufPtr)) - offset -			wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr)) +			wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, l))  		}  	}  	for i := range m.consts {  		offset := int64(len(*bufPtr))  		vc := &m.consts[i] -		vc.label.BinaryOffset = offset +		vc.labelPos.binaryOffset = offset  		if vc._var == nil {  			lo, hi := vc.lo, vc.hi  			m.c.Emit8Bytes(lo) @@ -2059,7 +2175,7 @@ func (m *machine) Encode(ctx context.Context) (err error) {  		switch p.instr.kind {  		case jmp, jmpIf, lea, xmmUnaryRmR:  			target := p.instr.jmpLabel() -			targetOffset := ectx.LabelPositions[target].BinaryOffset +			targetOffset := m.labelPositionPool.Get(int(target)).binaryOffset  			imm32Offset := p.imm32Offset  			jmpOffset := int32(targetOffset - (p.imm32Offset + 4)) // +4 because RIP points to the next instruction.  			binary.LittleEndian.PutUint32(buf[imm32Offset:], uint32(jmpOffset)) @@ -2068,7 +2184,7 @@ func (m *machine) Encode(ctx context.Context) (err error) {  			// Each entry is the offset from the beginning of the jmpTableIsland instruction in 8 bytes.  			targets := m.jmpTableTargets[p.instr.u1]  			for i, l := range targets { -				targetOffset := ectx.LabelPositions[backend.Label(l)].BinaryOffset +				targetOffset := m.labelPositionPool.Get(int(l)).binaryOffset  				jmpOffset := targetOffset - tableBegin  				binary.LittleEndian.PutUint64(buf[tableBegin+int64(i)*8:], uint64(jmpOffset))  			} @@ -2097,7 +2213,7 @@ func (m *machine) ResolveRelocations(refToBinaryOffset []int, binary []byte, rel  // CallTrampolineIslandInfo implements backend.Machine CallTrampolineIslandInfo.  func (m *machine) CallTrampolineIslandInfo(_ int) (_, _ int, _ error) { return } -func (m *machine) lowerIcmpToFlag(xd, yd *backend.SSAValueDefinition, _64 bool) { +func (m *machine) lowerIcmpToFlag(xd, yd backend.SSAValueDefinition, _64 bool) {  	x := m.getOperand_Reg(xd)  	y := m.getOperand_Mem_Imm32_Reg(yd)  	cmp := m.allocateInstr().asCmpRmiR(true, y, x.reg(), _64) @@ -2140,7 +2256,7 @@ func (m *machine) lowerFcmpToFlags(instr *ssa.Instruction) (f1, f2 cond, and boo  // allocateInstr allocates an instruction.  func (m *machine) allocateInstr() *instruction { -	instr := m.ectx.InstructionPool.Allocate() +	instr := m.instrPool.Allocate()  	if !m.regAllocStarted {  		instr.addedBeforeRegAlloc = true  	} @@ -2154,24 +2270,22 @@ func (m *machine) allocateNop() *instruction {  }  func (m *machine) insert(i *instruction) { -	ectx := m.ectx -	ectx.PendingInstructions = append(ectx.PendingInstructions, i) +	m.pendingInstructions = append(m.pendingInstructions, i)  } -func (m *machine) allocateBrTarget() (nop *instruction, l backend.Label) { //nolint -	pos := m.allocateLabel() -	l = pos.L +func (m *machine) allocateBrTarget() (nop *instruction, l label) { //nolint +	l, pos := m.allocateLabel()  	nop = m.allocateInstr()  	nop.asNop0WithLabel(l) -	pos.Begin, pos.End = nop, nop +	pos.begin, pos.end = nop, nop  	return  } -func (m *machine) allocateLabel() *labelPosition { -	ectx := m.ectx -	l := ectx.AllocateLabel() -	pos := ectx.GetOrAllocateLabelPosition(l) -	return pos +func (m *machine) allocateLabel() (label, *labelPosition) { +	l := m.nextLabel +	pos := m.labelPositionPool.GetOrAllocate(int(l)) +	m.nextLabel++ +	return l, pos  }  func (m *machine) getVRegSpillSlotOffsetFromSP(id regalloc.VRegID, size byte) int64 { @@ -3185,22 +3299,22 @@ func (m *machine) lowerShuffle(x, y ssa.Value, lo, hi uint64, ret ssa.Value) {  		}  	} -	xmaskLabel := m.allocateLabel() -	m.consts = append(m.consts, _const{lo: xMask[0], hi: xMask[1], label: xmaskLabel}) -	ymaskLabel := m.allocateLabel() -	m.consts = append(m.consts, _const{lo: yMask[0], hi: yMask[1], label: ymaskLabel}) +	xl, xmaskPos := m.allocateLabel() +	m.consts = append(m.consts, _const{lo: xMask[0], hi: xMask[1], label: xl, labelPos: xmaskPos}) +	yl, ymaskPos := m.allocateLabel() +	m.consts = append(m.consts, _const{lo: yMask[0], hi: yMask[1], label: yl, labelPos: ymaskPos})  	xx, yy := m.getOperand_Reg(m.c.ValueDefinition(x)), m.getOperand_Reg(m.c.ValueDefinition(y))  	tmpX, tmpY := m.copyToTmp(xx.reg()), m.copyToTmp(yy.reg())  	// Apply mask to X.  	tmp := m.c.AllocateVReg(ssa.TypeV128) -	loadMaskLo := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(xmaskLabel.L)), tmp) +	loadMaskLo := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(xl)), tmp)  	m.insert(loadMaskLo)  	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmp), tmpX))  	// Apply mask to Y. -	loadMaskHi := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(ymaskLabel.L)), tmp) +	loadMaskHi := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(yl)), tmp)  	m.insert(loadMaskHi)  	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmp), tmpY)) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go index 8fa974c66..e53729860 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go @@ -12,7 +12,7 @@ func (m *machine) PostRegAlloc() {  }  func (m *machine) setupPrologue() { -	cur := m.ectx.RootInstr +	cur := m.rootInstr  	prevInitInst := cur.next  	// At this point, we have the stack layout as follows: @@ -130,14 +130,13 @@ func (m *machine) setupPrologue() {  // 3. Inserts the dec/inc RSP instruction right before/after the call instruction.  // 4. Lowering that is supposed to be done after regalloc.  func (m *machine) postRegAlloc() { -	ectx := m.ectx -	for cur := ectx.RootInstr; cur != nil; cur = cur.next { +	for cur := m.rootInstr; cur != nil; cur = cur.next {  		switch k := cur.kind; k {  		case ret:  			m.setupEpilogueAfter(cur.prev)  			continue  		case fcvtToSintSequence, fcvtToUintSequence: -			m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0] +			m.pendingInstructions = m.pendingInstructions[:0]  			if k == fcvtToSintSequence {  				m.lowerFcvtToSintSequenceAfterRegalloc(cur)  			} else { @@ -146,29 +145,29 @@ func (m *machine) postRegAlloc() {  			prev := cur.prev  			next := cur.next  			cur := prev -			for _, instr := range m.ectx.PendingInstructions { +			for _, instr := range m.pendingInstructions {  				cur = linkInstr(cur, instr)  			}  			linkInstr(cur, next)  			continue  		case xmmCMov: -			m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0] +			m.pendingInstructions = m.pendingInstructions[:0]  			m.lowerXmmCmovAfterRegAlloc(cur)  			prev := cur.prev  			next := cur.next  			cur := prev -			for _, instr := range m.ectx.PendingInstructions { +			for _, instr := range m.pendingInstructions {  				cur = linkInstr(cur, instr)  			}  			linkInstr(cur, next)  			continue  		case idivRemSequence: -			m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0] +			m.pendingInstructions = m.pendingInstructions[:0]  			m.lowerIDivRemSequenceAfterRegAlloc(cur)  			prev := cur.prev  			next := cur.next  			cur := prev -			for _, instr := range m.ectx.PendingInstructions { +			for _, instr := range m.pendingInstructions {  				cur = linkInstr(cur, instr)  			}  			linkInstr(cur, next) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go index 0bb28ee9e..de9dcc944 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go @@ -1,13 +1,226 @@  package amd64  import ( -	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"  	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"  	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"  ) -// InsertMoveBefore implements backend.RegAllocFunctionMachine. -func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { +// regAllocFn implements regalloc.Function. +type regAllocFn struct { +	ssaB                   ssa.Builder +	m                      *machine +	loopNestingForestRoots []ssa.BasicBlock +	blockIter              int +} + +// PostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorBegin() *labelPosition { +	f.blockIter = len(f.m.orderedSSABlockLabelPos) - 1 +	return f.PostOrderBlockIteratorNext() +} + +// PostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorNext() *labelPosition { +	if f.blockIter < 0 { +		return nil +	} +	b := f.m.orderedSSABlockLabelPos[f.blockIter] +	f.blockIter-- +	return b +} + +// ReversePostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorBegin() *labelPosition { +	f.blockIter = 0 +	return f.ReversePostOrderBlockIteratorNext() +} + +// ReversePostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorNext() *labelPosition { +	if f.blockIter >= len(f.m.orderedSSABlockLabelPos) { +		return nil +	} +	b := f.m.orderedSSABlockLabelPos[f.blockIter] +	f.blockIter++ +	return b +} + +// ClobberedRegisters implements regalloc.Function. +func (f *regAllocFn) ClobberedRegisters(regs []regalloc.VReg) { +	f.m.clobberedRegs = append(f.m.clobberedRegs[:0], regs...) +} + +// LoopNestingForestRoots implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoots() int { +	f.loopNestingForestRoots = f.ssaB.LoopNestingForestRoots() +	return len(f.loopNestingForestRoots) +} + +// LoopNestingForestRoot implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoot(i int) *labelPosition { +	root := f.loopNestingForestRoots[i] +	pos := f.m.getOrAllocateSSABlockLabelPosition(root) +	return pos +} + +// LowestCommonAncestor implements regalloc.Function. +func (f *regAllocFn) LowestCommonAncestor(blk1, blk2 *labelPosition) *labelPosition { +	sb := f.ssaB.LowestCommonAncestor(blk1.sb, blk2.sb) +	pos := f.m.getOrAllocateSSABlockLabelPosition(sb) +	return pos +} + +// Idom implements regalloc.Function. +func (f *regAllocFn) Idom(blk *labelPosition) *labelPosition { +	sb := f.ssaB.Idom(blk.sb) +	pos := f.m.getOrAllocateSSABlockLabelPosition(sb) +	return pos +} + +// SwapBefore implements regalloc.Function. +func (f *regAllocFn) SwapBefore(x1, x2, tmp regalloc.VReg, instr *instruction) { +	f.m.swap(instr.prev, x1, x2, tmp) +} + +// StoreRegisterBefore implements regalloc.Function. +func (f *regAllocFn) StoreRegisterBefore(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertStoreRegisterAt(v, instr, false) +} + +// StoreRegisterAfter implements regalloc.Function. +func (f *regAllocFn) StoreRegisterAfter(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertStoreRegisterAt(v, instr, true) +} + +// ReloadRegisterBefore implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterBefore(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertReloadRegisterAt(v, instr, false) +} + +// ReloadRegisterAfter implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterAfter(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertReloadRegisterAt(v, instr, true) +} + +// InsertMoveBefore implements regalloc.Function. +func (f *regAllocFn) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { +	f.m.insertMoveBefore(dst, src, instr) +} + +// LoopNestingForestChild implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestChild(pos *labelPosition, i int) *labelPosition { +	childSB := pos.sb.LoopNestingForestChildren()[i] +	return f.m.getOrAllocateSSABlockLabelPosition(childSB) +} + +// Succ implements regalloc.Block. +func (f *regAllocFn) Succ(pos *labelPosition, i int) *labelPosition { +	succSB := pos.sb.Succ(i) +	if succSB.ReturnBlock() { +		return nil +	} +	return f.m.getOrAllocateSSABlockLabelPosition(succSB) +} + +// Pred implements regalloc.Block. +func (f *regAllocFn) Pred(pos *labelPosition, i int) *labelPosition { +	predSB := pos.sb.Pred(i) +	return f.m.getOrAllocateSSABlockLabelPosition(predSB) +} + +// BlockParams implements regalloc.Function. +func (f *regAllocFn) BlockParams(pos *labelPosition, regs *[]regalloc.VReg) []regalloc.VReg { +	c := f.m.c +	*regs = (*regs)[:0] +	for i := 0; i < pos.sb.Params(); i++ { +		v := c.VRegOf(pos.sb.Param(i)) +		*regs = append(*regs, v) +	} +	return *regs +} + +// ID implements regalloc.Block. +func (pos *labelPosition) ID() int32 { +	return int32(pos.sb.ID()) +} + +// InstrIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrIteratorBegin() *instruction { +	ret := pos.begin +	pos.cur = ret +	return ret +} + +// InstrIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrIteratorNext() *instruction { +	for { +		if pos.cur == pos.end { +			return nil +		} +		instr := pos.cur.next +		pos.cur = instr +		if instr == nil { +			return nil +		} else if instr.addedBeforeRegAlloc { +			// Only concerned about the instruction added before regalloc. +			return instr +		} +	} +} + +// InstrRevIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorBegin() *instruction { +	pos.cur = pos.end +	return pos.cur +} + +// InstrRevIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorNext() *instruction { +	for { +		if pos.cur == pos.begin { +			return nil +		} +		instr := pos.cur.prev +		pos.cur = instr +		if instr == nil { +			return nil +		} else if instr.addedBeforeRegAlloc { +			// Only concerned about the instruction added before regalloc. +			return instr +		} +	} +} + +// FirstInstr implements regalloc.Block. +func (pos *labelPosition) FirstInstr() *instruction { return pos.begin } + +// LastInstrForInsertion implements regalloc.Block. +func (pos *labelPosition) LastInstrForInsertion() *instruction { +	return lastInstrForInsertion(pos.begin, pos.end) +} + +// Preds implements regalloc.Block. +func (pos *labelPosition) Preds() int { return pos.sb.Preds() } + +// Entry implements regalloc.Block. +func (pos *labelPosition) Entry() bool { return pos.sb.EntryBlock() } + +// Succs implements regalloc.Block. +func (pos *labelPosition) Succs() int { return pos.sb.Succs() } + +// LoopHeader implements regalloc.Block. +func (pos *labelPosition) LoopHeader() bool { return pos.sb.LoopHeader() } + +// LoopNestingForestChildren implements regalloc.Block. +func (pos *labelPosition) LoopNestingForestChildren() int { +	return len(pos.sb.LoopNestingForestChildren()) +} + +func (m *machine) insertMoveBefore(dst, src regalloc.VReg, instr *instruction) {  	typ := src.RegType()  	if typ != dst.RegType() {  		panic("BUG: src and dst must have the same type") @@ -26,8 +239,7 @@ func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) {  	linkInstr(cur, prevNext)  } -// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {  	if !v.IsRealReg() {  		panic("BUG: VReg must be backed by real reg to be stored")  	} @@ -61,8 +273,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft  	return linkInstr(cur, prevNext)  } -// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {  	if !v.IsRealReg() {  		panic("BUG: VReg must be backed by real reg to be stored")  	} @@ -98,13 +309,7 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af  	return linkInstr(cur, prevNext)  } -// ClobberedRegisters implements backend.RegAllocFunctionMachine. -func (m *machine) ClobberedRegisters(regs []regalloc.VReg) { -	m.clobberedRegs = append(m.clobberedRegs[:0], regs...) -} - -// Swap implements backend.RegAllocFunctionMachine. -func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) { +func (m *machine) swap(cur *instruction, x1, x2, tmp regalloc.VReg) {  	if x1.RegType() == regalloc.RegTypeInt {  		prevNext := cur.next  		xc := m.allocateInstr().asXCHG(x1, newOperandReg(x2), 8) @@ -113,25 +318,24 @@ func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {  	} else {  		if tmp.Valid() {  			prevNext := cur.next -			m.InsertMoveBefore(tmp, x1, prevNext) -			m.InsertMoveBefore(x1, x2, prevNext) -			m.InsertMoveBefore(x2, tmp, prevNext) +			m.insertMoveBefore(tmp, x1, prevNext) +			m.insertMoveBefore(x1, x2, prevNext) +			m.insertMoveBefore(x2, tmp, prevNext)  		} else {  			prevNext := cur.next  			r2 := x2.RealReg()  			// Temporarily spill x1 to stack. -			cur = m.InsertStoreRegisterAt(x1, cur, true).prev +			cur = m.insertStoreRegisterAt(x1, cur, true).prev  			// Then move x2 to x1.  			cur = linkInstr(cur, m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqa, newOperandReg(x2), x1))  			linkInstr(cur, prevNext)  			// Then reload the original value on x1 from stack to r2. -			m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true) +			m.insertReloadRegisterAt(x1.SetRealReg(r2), cur, true)  		}  	}  } -// LastInstrForInsertion implements backend.RegAllocFunctionMachine. -func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction { +func lastInstrForInsertion(begin, end *instruction) *instruction {  	cur := end  	for cur.kind == nop0 {  		cur = cur.prev @@ -146,8 +350,3 @@ func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction {  		return end  	}  } - -// SSABlockLabel implements backend.RegAllocFunctionMachine. -func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label { -	return m.ectx.SsaBlockIDToLabels[id] -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go index 539a8b754..8d514d857 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go @@ -127,7 +127,7 @@ func (m *machine) lowerSqmulRoundSat(x, y, ret ssa.Value) {  	tmpX := m.copyToTmp(xx.reg())  	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmulhrsw, yy, tmpX)) -	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmpX), tmp)) +	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqw, newOperandReg(tmpX), tmp))  	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmpX))  	m.copyTo(tmpX, m.c.VRegOf(ret)) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go index c6fcb8673..787975683 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go @@ -59,7 +59,7 @@ func (o *operand) format(_64 bool) string {  	case operandKindImm32:  		return fmt.Sprintf("$%d", int32(o.imm32()))  	case operandKindLabel: -		return backend.Label(o.imm32()).String() +		return label(o.imm32()).String()  	default:  		panic(fmt.Sprintf("BUG: invalid operand: %s", o.kind))  	} @@ -85,22 +85,22 @@ func (o *operand) imm32() uint32 {  	return uint32(o.data)  } -func (o *operand) label() backend.Label { +func (o *operand) label() label {  	switch o.kind {  	case operandKindLabel: -		return backend.Label(o.data) +		return label(o.data)  	case operandKindMem:  		mem := o.addressMode()  		if mem.kind() != amodeRipRel {  			panic("BUG: invalid label")  		} -		return backend.Label(mem.imm32) +		return label(mem.imm32)  	default:  		panic("BUG: invalid operand kind")  	}  } -func newOperandLabel(label backend.Label) operand { +func newOperandLabel(label label) operand {  	return operand{kind: operandKindLabel, data: uint64(label)}  } @@ -221,7 +221,7 @@ func (m *machine) newAmodeRegRegShift(imm32 uint32, base, index regalloc.VReg, s  	return ret  } -func (m *machine) newAmodeRipRel(label backend.Label) *amode { +func (m *machine) newAmodeRipRel(label label) *amode {  	ret := m.amodePool.Allocate()  	*ret = amode{kindWithShift: uint32(amodeRipRel), imm32: uint32(label)}  	return ret @@ -246,18 +246,18 @@ func (a *amode) String() string {  			"%d(%s,%s,%d)",  			int32(a.imm32), formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift)  	case amodeRipRel: -		return fmt.Sprintf("%s(%%rip)", backend.Label(a.imm32)) +		return fmt.Sprintf("%s(%%rip)", label(a.imm32))  	default:  		panic("BUG: invalid amode kind")  	}  } -func (m *machine) getOperand_Mem_Reg(def *backend.SSAValueDefinition) (op operand) { -	if def.IsFromBlockParam() { -		return newOperandReg(def.BlkParamVReg) +func (m *machine) getOperand_Mem_Reg(def backend.SSAValueDefinition) (op operand) { +	if !def.IsFromInstr() { +		return newOperandReg(m.c.VRegOf(def.V))  	} -	if def.SSAValue().Type() == ssa.TypeV128 { +	if def.V.Type() == ssa.TypeV128 {  		// SIMD instructions require strict memory alignment, so we don't support the memory operand for V128 at the moment.  		return m.getOperand_Reg(def)  	} @@ -272,9 +272,9 @@ func (m *machine) getOperand_Mem_Reg(def *backend.SSAValueDefinition) (op operan  	return m.getOperand_Reg(def)  } -func (m *machine) getOperand_Mem_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) { -	if def.IsFromBlockParam() { -		return newOperandReg(def.BlkParamVReg) +func (m *machine) getOperand_Mem_Imm32_Reg(def backend.SSAValueDefinition) (op operand) { +	if !def.IsFromInstr() { +		return newOperandReg(m.c.VRegOf(def.V))  	}  	if m.c.MatchInstr(def, ssa.OpcodeLoad) { @@ -287,9 +287,9 @@ func (m *machine) getOperand_Mem_Imm32_Reg(def *backend.SSAValueDefinition) (op  	return m.getOperand_Imm32_Reg(def)  } -func (m *machine) getOperand_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) { -	if def.IsFromBlockParam() { -		return newOperandReg(def.BlkParamVReg) +func (m *machine) getOperand_Imm32_Reg(def backend.SSAValueDefinition) (op operand) { +	if !def.IsFromInstr() { +		return newOperandReg(m.c.VRegOf(def.V))  	}  	instr := def.Instr @@ -323,24 +323,14 @@ func asImm32(val uint64, allowSignExt bool) (uint32, bool) {  	return u32val, true  } -func (m *machine) getOperand_Reg(def *backend.SSAValueDefinition) (op operand) { +func (m *machine) getOperand_Reg(def backend.SSAValueDefinition) (op operand) {  	var v regalloc.VReg -	if def.IsFromBlockParam() { -		v = def.BlkParamVReg +	if instr := def.Instr; instr != nil && instr.Constant() { +		// We inline all the constant instructions so that we could reduce the register usage. +		v = m.lowerConstant(instr) +		instr.MarkLowered()  	} else { -		instr := def.Instr -		if instr.Constant() { -			// We inline all the constant instructions so that we could reduce the register usage. -			v = m.lowerConstant(instr) -			instr.MarkLowered() -		} else { -			if n := def.N; n == 0 { -				v = m.c.VRegOf(instr.Return()) -			} else { -				_, rs := instr.Returns() -				v = m.c.VRegOf(rs[n-1]) -			} -		} +		v = m.c.VRegOf(def.V)  	}  	return newOperandReg(v)  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go deleted file mode 100644 index 5219837e3..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build !tinygo - -package amd64 - -import "reflect" - -// setSliceLimits sets both Cap and Len for the given reflected slice. -func setSliceLimits(s *reflect.SliceHeader, limit uintptr) { -	s.Len = int(limit) -	s.Cap = int(limit) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go deleted file mode 100644 index df4cf46ec..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build tinygo - -package amd64 - -import "reflect" - -// setSliceLimits sets both Cap and Len for the given reflected slice. -func setSliceLimits(s *reflect.SliceHeader, limit uintptr) { -	s.Len = limit -	s.Len = limit -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go index 05ba5f027..ef823bdbd 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go @@ -9,12 +9,14 @@ import (  )  func stackView(rbp, top uintptr) []byte { +	l := int(top - rbp)  	var stackBuf []byte  	{ -		// TODO: use unsafe.Slice after floor version is set to Go 1.20. +		//nolint:staticcheck  		hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf))  		hdr.Data = rbp -		setSliceLimits(hdr, top-rbp) +		hdr.Len = l +		hdr.Cap = l  	}  	return stackBuf  } @@ -72,9 +74,9 @@ func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {  	//              |   SizeInBytes   |  	//              +-----------------+ <---- stackPointerBeforeGoCall  	//                 (low address) -	data := unsafe.Pointer(uintptr(unsafe.Pointer(stackPointerBeforeGoCall)) + 8) +	data := unsafe.Add(unsafe.Pointer(stackPointerBeforeGoCall), 8)  	size := *stackPointerBeforeGoCall / 8 -	return unsafe.Slice((*uint64)(data), int(size)) +	return unsafe.Slice((*uint64)(data), size)  }  func AdjustClonedStack(oldRsp, oldTop, rsp, rbp, top uintptr) { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go index 4eaa13ce1..d1eaa7cd4 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go @@ -182,9 +182,9 @@ func (m *machine) LowerReturns(rets []ssa.Value) {  // callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the  // caller side of the function call. -func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, slotBegin int64) { +func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def backend.SSAValueDefinition, slotBegin int64) {  	arg := &a.Args[argIndex] -	if def != nil && def.IsFromInstr() { +	if def.IsFromInstr() {  		// Constant instructions are inlined.  		if inst := def.Instr; inst.Constant() {  			val := inst.Return() @@ -228,10 +228,9 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i  }  func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, *addressMode) { -	exct := m.executableContext -	exct.PendingInstructions = exct.PendingInstructions[:0] +	m.pendingInstructions = m.pendingInstructions[:0]  	mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse) -	for _, instr := range exct.PendingInstructions { +	for _, instr := range m.pendingInstructions {  		cur = linkInstr(cur, instr)  	}  	return cur, mode diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go index 99e6bb482..06f8a4a05 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go @@ -14,7 +14,6 @@ var calleeSavedRegistersSorted = []regalloc.VReg{  // CompileGoFunctionTrampoline implements backend.Machine.  func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte { -	exct := m.executableContext  	argBegin := 1 // Skips exec context by default.  	if needModuleContextPtr {  		argBegin++ @@ -26,7 +25,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *  	cur := m.allocateInstr()  	cur.asNop0() -	exct.RootInstr = cur +	m.rootInstr = cur  	// Execution context is always the first argument.  	execCtrPtr := x0VReg @@ -244,7 +243,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *  	ret.asRet()  	linkInstr(cur, ret) -	m.encode(m.executableContext.RootInstr) +	m.encode(m.rootInstr)  	return m.compiler.Buf()  } @@ -302,20 +301,18 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re  }  func (m *machine) lowerConstantI64AndInsert(cur *instruction, dst regalloc.VReg, v int64) *instruction { -	exct := m.executableContext -	exct.PendingInstructions = exct.PendingInstructions[:0] +	m.pendingInstructions = m.pendingInstructions[:0]  	m.lowerConstantI64(dst, v) -	for _, instr := range exct.PendingInstructions { +	for _, instr := range m.pendingInstructions {  		cur = linkInstr(cur, instr)  	}  	return cur  }  func (m *machine) lowerConstantI32AndInsert(cur *instruction, dst regalloc.VReg, v int32) *instruction { -	exct := m.executableContext -	exct.PendingInstructions = exct.PendingInstructions[:0] +	m.pendingInstructions = m.pendingInstructions[:0]  	m.lowerConstantI32(dst, v) -	for _, instr := range exct.PendingInstructions { +	for _, instr := range m.pendingInstructions {  		cur = linkInstr(cur, instr)  	}  	return cur diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go index 7121cb538..1f563428a 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -36,18 +36,6 @@ type (  	instructionKind byte  ) -func asNop0(i *instruction) { -	i.kind = nop0 -} - -func setNext(i, next *instruction) { -	i.next = next -} - -func setPrev(i, prev *instruction) { -	i.prev = prev -} -  // IsCall implements regalloc.Instr IsCall.  func (i *instruction) IsCall() bool {  	return i.kind == call @@ -63,21 +51,6 @@ func (i *instruction) IsReturn() bool {  	return i.kind == ret  } -// Next implements regalloc.Instr Next. -func (i *instruction) Next() regalloc.Instr { -	return i.next -} - -// Prev implements regalloc.Instr Prev. -func (i *instruction) Prev() regalloc.Instr { -	return i.prev -} - -// AddedBeforeRegAlloc implements regalloc.Instr AddedBeforeRegAlloc. -func (i *instruction) AddedBeforeRegAlloc() bool { -	return i.addedBeforeRegAlloc -} -  type defKind byte  const ( diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go index f0ede2d6a..21be9b71e 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go @@ -12,7 +12,7 @@ import (  // Encode implements backend.Machine Encode.  func (m *machine) Encode(ctx context.Context) error {  	m.resolveRelativeAddresses(ctx) -	m.encode(m.executableContext.RootInstr) +	m.encode(m.rootInstr)  	if l := len(m.compiler.Buf()); l > maxFunctionExecutableSize {  		return fmt.Errorf("function size exceeds the limit: %d > %d", l, maxFunctionExecutableSize)  	} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 048bf3204..f9df356c0 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -17,19 +17,18 @@ import (  // LowerSingleBranch implements backend.Machine.  func (m *machine) LowerSingleBranch(br *ssa.Instruction) { -	ectx := m.executableContext  	switch br.Opcode() {  	case ssa.OpcodeJump: -		_, _, targetBlk := br.BranchData() +		_, _, targetBlkID := br.BranchData()  		if br.IsFallthroughJump() {  			return  		}  		b := m.allocateInstr() -		target := ectx.GetOrAllocateSSABlockLabel(targetBlk) -		if target == labelReturn { +		targetBlk := m.compiler.SSABuilder().BasicBlock(targetBlkID) +		if targetBlk.ReturnBlock() {  			b.asRet()  		} else { -			b.asBr(target) +			b.asBr(ssaBlockLabel(targetBlk))  		}  		m.insert(b)  	case ssa.OpcodeBrTable: @@ -40,7 +39,8 @@ func (m *machine) LowerSingleBranch(br *ssa.Instruction) {  }  func (m *machine) lowerBrTable(i *ssa.Instruction) { -	index, targets := i.BrTableData() +	index, targetBlockIDs := i.BrTableData() +	targetBlockCount := len(targetBlockIDs.View())  	indexOperand := m.getOperand_NR(m.compiler.ValueDefinition(index), extModeNone)  	// Firstly, we have to do the bounds check of the index, and @@ -50,7 +50,7 @@ func (m *machine) lowerBrTable(i *ssa.Instruction) {  	// subs wzr, index, maxIndexReg  	// csel adjustedIndex, maxIndexReg, index, hs ;; if index is higher or equal than maxIndexReg.  	maxIndexReg := m.compiler.AllocateVReg(ssa.TypeI32) -	m.lowerConstantI32(maxIndexReg, int32(len(targets)-1)) +	m.lowerConstantI32(maxIndexReg, int32(targetBlockCount-1))  	subs := m.allocateInstr()  	subs.asALU(aluOpSubS, xzrVReg, indexOperand, operandNR(maxIndexReg), false)  	m.insert(subs) @@ -61,24 +61,24 @@ func (m *machine) lowerBrTable(i *ssa.Instruction) {  	brSequence := m.allocateInstr() -	tableIndex := m.addJmpTableTarget(targets) -	brSequence.asBrTableSequence(adjustedIndex, tableIndex, len(targets)) +	tableIndex := m.addJmpTableTarget(targetBlockIDs) +	brSequence.asBrTableSequence(adjustedIndex, tableIndex, targetBlockCount)  	m.insert(brSequence)  }  // LowerConditionalBranch implements backend.Machine.  func (m *machine) LowerConditionalBranch(b *ssa.Instruction) { -	exctx := m.executableContext -	cval, args, targetBlk := b.BranchData() +	cval, args, targetBlkID := b.BranchData()  	if len(args) > 0 {  		panic(fmt.Sprintf(  			"conditional branch shouldn't have args; likely a bug in critical edge splitting: from %s to %s", -			exctx.CurrentSSABlk, -			targetBlk, +			m.currentLabelPos.sb, +			targetBlkID,  		))  	} -	target := exctx.GetOrAllocateSSABlockLabel(targetBlk) +	targetBlk := m.compiler.SSABuilder().BasicBlock(targetBlkID) +	target := ssaBlockLabel(targetBlk)  	cvalDef := m.compiler.ValueDefinition(cval)  	switch { @@ -791,7 +791,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  	default:  		panic("TODO: lowering " + op.String())  	} -	m.executableContext.FlushPendingInstructions() +	m.FlushPendingInstructions()  }  func (m *machine) lowerShuffle(rd regalloc.VReg, rn, rm operand, lane1, lane2 uint64) { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go index d9fbf1789..7a398c3d0 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go @@ -162,9 +162,9 @@ func (o operand) assignReg(v regalloc.VReg) operand {  //  // `mode` is used to extend the operand if the bit length is smaller than mode.bits().  // If the operand can be expressed as operandKindImm12, `mode` is ignored. -func (m *machine) getOperand_Imm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { -	if def.IsFromBlockParam() { -		return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_Imm12_ER_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { +	if !def.IsFromInstr() { +		return operandNR(m.compiler.VRegOf(def.V))  	}  	instr := def.Instr @@ -179,9 +179,9 @@ func (m *machine) getOperand_Imm12_ER_SR_NR(def *backend.SSAValueDefinition, mod  // getOperand_MaybeNegatedImm12_ER_SR_NR is almost the same as getOperand_Imm12_ER_SR_NR, but this might negate the immediate value.  // If the immediate value is negated, the second return value is true, otherwise always false. -func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand, negatedImm12 bool) { -	if def.IsFromBlockParam() { -		return operandNR(def.BlkParamVReg), false +func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand, negatedImm12 bool) { +	if !def.IsFromInstr() { +		return operandNR(m.compiler.VRegOf(def.V)), false  	}  	instr := def.Instr @@ -193,7 +193,7 @@ func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDef  		}  		signExtended := int64(c) -		if def.SSAValue().Type().Bits() == 32 { +		if def.V.Type().Bits() == 32 {  			signExtended = (signExtended << 32) >> 32  		}  		negatedWithoutSign := -signExtended @@ -208,9 +208,9 @@ func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDef  // ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def).  //  // `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -func (m *machine) getOperand_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { -	if def.IsFromBlockParam() { -		return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_ER_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { +	if !def.IsFromInstr() { +		return operandNR(m.compiler.VRegOf(def.V))  	}  	if m.compiler.MatchInstr(def, ssa.OpcodeSExtend) || m.compiler.MatchInstr(def, ssa.OpcodeUExtend) { @@ -251,9 +251,9 @@ func (m *machine) getOperand_ER_SR_NR(def *backend.SSAValueDefinition, mode extM  // ensureValueNR returns an operand of either operandKindSR or operandKindNR from the given value (defined by `def).  //  // `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -func (m *machine) getOperand_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { -	if def.IsFromBlockParam() { -		return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { +	if !def.IsFromInstr() { +		return operandNR(m.compiler.VRegOf(def.V))  	}  	if m.compiler.MatchInstr(def, ssa.OpcodeIshl) { @@ -273,9 +273,9 @@ func (m *machine) getOperand_SR_NR(def *backend.SSAValueDefinition, mode extMode  }  // getOperand_ShiftImm_NR returns an operand of either operandKindShiftImm or operandKindNR from the given value (defined by `def). -func (m *machine) getOperand_ShiftImm_NR(def *backend.SSAValueDefinition, mode extMode, shiftBitWidth byte) (op operand) { -	if def.IsFromBlockParam() { -		return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_ShiftImm_NR(def backend.SSAValueDefinition, mode extMode, shiftBitWidth byte) (op operand) { +	if !def.IsFromInstr() { +		return operandNR(m.compiler.VRegOf(def.V))  	}  	instr := def.Instr @@ -289,28 +289,18 @@ func (m *machine) getOperand_ShiftImm_NR(def *backend.SSAValueDefinition, mode e  // ensureValueNR returns an operand of operandKindNR from the given value (defined by `def).  //  // `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -func (m *machine) getOperand_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { +func (m *machine) getOperand_NR(def backend.SSAValueDefinition, mode extMode) (op operand) {  	var v regalloc.VReg -	if def.IsFromBlockParam() { -		v = def.BlkParamVReg +	if def.IsFromInstr() && def.Instr.Constant() { +		// We inline all the constant instructions so that we could reduce the register usage. +		v = m.lowerConstant(def.Instr) +		def.Instr.MarkLowered()  	} else { -		instr := def.Instr -		if instr.Constant() { -			// We inline all the constant instructions so that we could reduce the register usage. -			v = m.lowerConstant(instr) -			instr.MarkLowered() -		} else { -			if n := def.N; n == 0 { -				v = m.compiler.VRegOf(instr.Return()) -			} else { -				_, rs := instr.Returns() -				v = m.compiler.VRegOf(rs[n-1]) -			} -		} +		v = m.compiler.VRegOf(def.V)  	}  	r := v -	switch inBits := def.SSAValue().Type().Bits(); { +	switch inBits := def.V.Type().Bits(); {  	case mode == extModeNone:  	case inBits == 32 && (mode == extModeZeroExtend32 || mode == extModeSignExtend32):  	case inBits == 32 && mode == extModeZeroExtend64: diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go index 5f584f928..00e6b238f 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go @@ -3,6 +3,7 @@ package arm64  import (  	"context"  	"fmt" +	"math"  	"strings"  	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend" @@ -14,12 +15,33 @@ import (  type (  	// machine implements backend.Machine.  	machine struct { -		compiler          backend.Compiler -		executableContext *backend.ExecutableContextT[instruction] -		currentABI        *backend.FunctionABI - -		regAlloc   regalloc.Allocator -		regAllocFn *backend.RegAllocFunction[*instruction, *machine] +		compiler   backend.Compiler +		currentABI *backend.FunctionABI +		instrPool  wazevoapi.Pool[instruction] +		// labelPositionPool is the pool of labelPosition. The id is the label where +		// if the label is less than the maxSSABlockID, it's the ssa.BasicBlockID. +		labelPositionPool wazevoapi.IDedPool[labelPosition] + +		// nextLabel is the next label to be allocated. The first free label comes after maxSSABlockID +		// so that we can have an identical label for the SSA block ID, which is useful for debugging. +		nextLabel label +		// rootInstr is the first instruction of the function. +		rootInstr *instruction +		// currentLabelPos is the currently-compiled ssa.BasicBlock's labelPosition. +		currentLabelPos *labelPosition +		// orderedSSABlockLabelPos is the ordered list of labelPosition in the generated code for each ssa.BasicBlock. +		orderedSSABlockLabelPos []*labelPosition +		// returnLabelPos is the labelPosition for the return block. +		returnLabelPos labelPosition +		// perBlockHead and perBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock. +		perBlockHead, perBlockEnd *instruction +		// pendingInstructions are the instructions which are not yet emitted into the instruction list. +		pendingInstructions []*instruction +		// maxSSABlockID is the maximum ssa.BasicBlockID in the current function. +		maxSSABlockID label + +		regAlloc   regalloc.Allocator[*instruction, *labelPosition, *regAllocFn] +		regAllocFn regAllocFn  		amodePool wazevoapi.Pool[addressMode] @@ -35,6 +57,8 @@ type (  		// jmpTableTargets holds the labels of the jump table targets.  		jmpTableTargets [][]uint32 +		// jmpTableTargetNext is the index to the jmpTableTargets slice to be used for the next jump table. +		jmpTableTargetsNext int  		// spillSlotSize is the size of the stack slot in bytes used for spilling registers.  		// During the execution of the function, the stack looks like: @@ -91,45 +115,132 @@ type (  		nextLabel label  		offset    int64  	} +) -	labelPosition = backend.LabelPosition[instruction] -	label         = backend.Label +type ( +	// label represents a position in the generated code which is either +	// a real instruction or the constant InstructionPool (e.g. jump tables). +	// +	// This is exactly the same as the traditional "label" in assembly code. +	label uint32 + +	// labelPosition represents the regions of the generated code which the label represents. +	// This implements regalloc.Block. +	labelPosition struct { +		// sb is not nil if this corresponds to a ssa.BasicBlock. +		sb ssa.BasicBlock +		// cur is used to walk through the instructions in the block during the register allocation. +		cur, +		// begin and end are the first and last instructions of the block. +		begin, end *instruction +		// binaryOffset is the offset in the binary where the label is located. +		binaryOffset int64 +	}  )  const ( -	labelReturn  = backend.LabelReturn -	labelInvalid = backend.LabelInvalid +	labelReturn  label = math.MaxUint32 +	labelInvalid       = labelReturn - 1  ) +// String implements backend.Machine. +func (l label) String() string { +	return fmt.Sprintf("L%d", l) +} + +func resetLabelPosition(l *labelPosition) { +	*l = labelPosition{} +} +  // NewBackend returns a new backend for arm64.  func NewBackend() backend.Machine {  	m := &machine{  		spillSlots:        make(map[regalloc.VRegID]int64), -		executableContext: newExecutableContext(), -		regAlloc:          regalloc.NewAllocator(regInfo), +		regAlloc:          regalloc.NewAllocator[*instruction, *labelPosition, *regAllocFn](regInfo),  		amodePool:         wazevoapi.NewPool[addressMode](resetAddressMode), +		instrPool:         wazevoapi.NewPool[instruction](resetInstruction), +		labelPositionPool: wazevoapi.NewIDedPool[labelPosition](resetLabelPosition),  	} +	m.regAllocFn.m = m  	return m  } -func newExecutableContext() *backend.ExecutableContextT[instruction] { -	return backend.NewExecutableContextT[instruction](resetInstruction, setNext, setPrev, asNop0) +func ssaBlockLabel(sb ssa.BasicBlock) label { +	if sb.ReturnBlock() { +		return labelReturn +	} +	return label(sb.ID())  } -// ExecutableContext implements backend.Machine. -func (m *machine) ExecutableContext() backend.ExecutableContext { -	return m.executableContext +// getOrAllocateSSABlockLabelPosition returns the labelPosition for the given basic block. +func (m *machine) getOrAllocateSSABlockLabelPosition(sb ssa.BasicBlock) *labelPosition { +	if sb.ReturnBlock() { +		m.returnLabelPos.sb = sb +		return &m.returnLabelPos +	} + +	l := ssaBlockLabel(sb) +	pos := m.labelPositionPool.GetOrAllocate(int(l)) +	pos.sb = sb +	return pos  } -// RegAlloc implements backend.Machine Function. -func (m *machine) RegAlloc() { -	rf := m.regAllocFn -	for _, pos := range m.executableContext.OrderedBlockLabels { -		rf.AddBlock(pos.SB, pos.L, pos.Begin, pos.End) +// LinkAdjacentBlocks implements backend.Machine. +func (m *machine) LinkAdjacentBlocks(prev, next ssa.BasicBlock) { +	prevPos, nextPos := m.getOrAllocateSSABlockLabelPosition(prev), m.getOrAllocateSSABlockLabelPosition(next) +	prevPos.end.next = nextPos.begin +} + +// StartBlock implements backend.Machine. +func (m *machine) StartBlock(blk ssa.BasicBlock) { +	m.currentLabelPos = m.getOrAllocateSSABlockLabelPosition(blk) +	labelPos := m.currentLabelPos +	end := m.allocateNop() +	m.perBlockHead, m.perBlockEnd = end, end +	labelPos.begin, labelPos.end = end, end +	m.orderedSSABlockLabelPos = append(m.orderedSSABlockLabelPos, labelPos) +} + +// EndBlock implements ExecutableContext. +func (m *machine) EndBlock() { +	// Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions. +	m.insertAtPerBlockHead(m.allocateNop()) + +	m.currentLabelPos.begin = m.perBlockHead + +	if m.currentLabelPos.sb.EntryBlock() { +		m.rootInstr = m.perBlockHead +	} +} + +func (m *machine) insertAtPerBlockHead(i *instruction) { +	if m.perBlockHead == nil { +		m.perBlockHead = i +		m.perBlockEnd = i +		return  	} +	i.next = m.perBlockHead +	m.perBlockHead.prev = i +	m.perBlockHead = i +} + +// FlushPendingInstructions implements backend.Machine. +func (m *machine) FlushPendingInstructions() { +	l := len(m.pendingInstructions) +	if l == 0 { +		return +	} +	for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order. +		m.insertAtPerBlockHead(m.pendingInstructions[i]) +	} +	m.pendingInstructions = m.pendingInstructions[:0] +} + +// RegAlloc implements backend.Machine Function. +func (m *machine) RegAlloc() {  	m.regAllocStarted = true -	m.regAlloc.DoAllocation(rf) +	m.regAlloc.DoAllocation(&m.regAllocFn)  	// Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes.  	m.spillSlotSize = (m.spillSlotSize + 15) &^ 15  } @@ -146,13 +257,22 @@ func (m *machine) Reset() {  	m.clobberedRegs = m.clobberedRegs[:0]  	m.regAllocStarted = false  	m.regAlloc.Reset() -	m.regAllocFn.Reset()  	m.spillSlotSize = 0  	m.unresolvedAddressModes = m.unresolvedAddressModes[:0]  	m.maxRequiredStackSizeForCalls = 0 -	m.executableContext.Reset() -	m.jmpTableTargets = m.jmpTableTargets[:0] +	m.jmpTableTargetsNext = 0  	m.amodePool.Reset() +	m.instrPool.Reset() +	m.labelPositionPool.Reset() +	m.pendingInstructions = m.pendingInstructions[:0] +	m.perBlockHead, m.perBlockEnd, m.rootInstr = nil, nil, nil +	m.orderedSSABlockLabelPos = m.orderedSSABlockLabelPos[:0] +} + +// StartLoweringFunction implements backend.Machine StartLoweringFunction. +func (m *machine) StartLoweringFunction(maxBlockID ssa.BasicBlockID) { +	m.maxSSABlockID = label(maxBlockID) +	m.nextLabel = label(maxBlockID) + 1  }  // SetCurrentABI implements backend.Machine SetCurrentABI. @@ -168,12 +288,11 @@ func (m *machine) DisableStackCheck() {  // SetCompiler implements backend.Machine.  func (m *machine) SetCompiler(ctx backend.Compiler) {  	m.compiler = ctx -	m.regAllocFn = backend.NewRegAllocFunction[*instruction, *machine](m, ctx.SSABuilder(), ctx) +	m.regAllocFn.ssaB = ctx.SSABuilder()  }  func (m *machine) insert(i *instruction) { -	ectx := m.executableContext -	ectx.PendingInstructions = append(ectx.PendingInstructions, i) +	m.pendingInstructions = append(m.pendingInstructions, i)  }  func (m *machine) insertBrTargetLabel() label { @@ -183,18 +302,18 @@ func (m *machine) insertBrTargetLabel() label {  }  func (m *machine) allocateBrTarget() (nop *instruction, l label) { -	ectx := m.executableContext -	l = ectx.AllocateLabel() +	l = m.nextLabel +	m.nextLabel++  	nop = m.allocateInstr()  	nop.asNop0WithLabel(l) -	pos := ectx.GetOrAllocateLabelPosition(l) -	pos.Begin, pos.End = nop, nop +	pos := m.labelPositionPool.GetOrAllocate(int(l)) +	pos.begin, pos.end = nop, nop  	return  }  // allocateInstr allocates an instruction.  func (m *machine) allocateInstr() *instruction { -	instr := m.executableContext.InstructionPool.Allocate() +	instr := m.instrPool.Allocate()  	if !m.regAllocStarted {  		instr.addedBeforeRegAlloc = true  	} @@ -251,7 +370,6 @@ func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruc  // resolveRelativeAddresses resolves the relative addresses before encoding.  func (m *machine) resolveRelativeAddresses(ctx context.Context) { -	ectx := m.executableContext  	for {  		if len(m.unresolvedAddressModes) > 0 {  			arg0offset, ret0offset := m.arg0OffsetFromSP(), m.ret0OffsetFromSP() @@ -265,35 +383,36 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {  		var fn string  		var fnIndex int -		var labelToSSABlockID map[label]ssa.BasicBlockID +		var labelPosToLabel map[*labelPosition]label  		if wazevoapi.PerfMapEnabled { -			fn = wazevoapi.GetCurrentFunctionName(ctx) -			labelToSSABlockID = make(map[label]ssa.BasicBlockID) -			for i, l := range ectx.SsaBlockIDToLabels { -				labelToSSABlockID[l] = ssa.BasicBlockID(i) +			labelPosToLabel = make(map[*labelPosition]label) +			for i := 0; i <= m.labelPositionPool.MaxIDEncountered(); i++ { +				labelPosToLabel[m.labelPositionPool.Get(i)] = label(i)  			} + +			fn = wazevoapi.GetCurrentFunctionName(ctx)  			fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx)  		}  		// Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label.  		var offset int64 -		for i, pos := range ectx.OrderedBlockLabels { -			pos.BinaryOffset = offset +		for i, pos := range m.orderedSSABlockLabelPos { +			pos.binaryOffset = offset  			var size int64 -			for cur := pos.Begin; ; cur = cur.next { +			for cur := pos.begin; ; cur = cur.next {  				switch cur.kind {  				case nop0:  					l := cur.nop0Label() -					if pos := ectx.LabelPositions[l]; pos != nil { -						pos.BinaryOffset = offset + size +					if pos := m.labelPositionPool.Get(int(l)); pos != nil { +						pos.binaryOffset = offset + size  					}  				case condBr:  					if !cur.condBrOffsetResolved() {  						var nextLabel label -						if i < len(ectx.OrderedBlockLabels)-1 { +						if i < len(m.orderedSSABlockLabelPos)-1 {  							// Note: this is only used when the block ends with fallthrough,  							// therefore can be safely assumed that the next block exists when it's needed. -							nextLabel = ectx.OrderedBlockLabels[i+1].L +							nextLabel = ssaBlockLabel(m.orderedSSABlockLabelPos[i+1].sb)  						}  						m.condBrRelocs = append(m.condBrRelocs, condBrReloc{  							cbr: cur, currentLabelPos: pos, offset: offset + size, @@ -302,21 +421,14 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {  					}  				}  				size += cur.size() -				if cur == pos.End { +				if cur == pos.end {  					break  				}  			}  			if wazevoapi.PerfMapEnabled {  				if size > 0 { -					l := pos.L -					var labelStr string -					if blkID, ok := labelToSSABlockID[l]; ok { -						labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID) -					} else { -						labelStr = l.String() -					} -					wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr)) +					wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelPosToLabel[pos]))  				}  			}  			offset += size @@ -330,7 +442,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {  			offset := reloc.offset  			target := cbr.condBrLabel() -			offsetOfTarget := ectx.LabelPositions[target].BinaryOffset +			offsetOfTarget := m.labelPositionPool.Get(int(target)).binaryOffset  			diff := offsetOfTarget - offset  			if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {  				// This case the conditional branch is too huge. We place the trampoline instructions at the end of the current block, @@ -351,11 +463,11 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {  	}  	var currentOffset int64 -	for cur := ectx.RootInstr; cur != nil; cur = cur.next { +	for cur := m.rootInstr; cur != nil; cur = cur.next {  		switch cur.kind {  		case br:  			target := cur.brLabel() -			offsetOfTarget := ectx.LabelPositions[target].BinaryOffset +			offsetOfTarget := m.labelPositionPool.Get(int(target)).binaryOffset  			diff := offsetOfTarget - currentOffset  			divided := diff >> 2  			if divided < minSignedInt26 || divided > maxSignedInt26 { @@ -366,7 +478,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {  		case condBr:  			if !cur.condBrOffsetResolved() {  				target := cur.condBrLabel() -				offsetOfTarget := ectx.LabelPositions[target].BinaryOffset +				offsetOfTarget := m.labelPositionPool.Get(int(target)).binaryOffset  				diff := offsetOfTarget - currentOffset  				if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {  					panic("BUG: branch relocation for large conditional branch larger than 19-bit range must be handled properly") @@ -378,7 +490,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {  			targets := m.jmpTableTargets[tableIndex]  			for i := range targets {  				l := label(targets[i]) -				offsetOfTarget := ectx.LabelPositions[l].BinaryOffset +				offsetOfTarget := m.labelPositionPool.Get(int(l)).binaryOffset  				diff := offsetOfTarget - (currentOffset + brTableSequenceOffsetTableBegin)  				targets[i] = uint32(diff)  			} @@ -399,7 +511,7 @@ const (  )  func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *labelPosition, nextLabel label) { -	cur := currentBlk.End +	cur := currentBlk.end  	originalTarget := cbr.condBrLabel()  	endNext := cur.next @@ -422,32 +534,27 @@ func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *  	cur = linkInstr(cur, br)  	// Update the end of the current block. -	currentBlk.End = cur +	currentBlk.end = cur  	linkInstr(cur, endNext)  }  // Format implements backend.Machine.  func (m *machine) Format() string { -	ectx := m.executableContext  	begins := map[*instruction]label{} -	for _, pos := range ectx.LabelPositions { +	for l := label(0); l < m.nextLabel; l++ { +		pos := m.labelPositionPool.Get(int(l))  		if pos != nil { -			begins[pos.Begin] = pos.L +			begins[pos.begin] = l  		}  	} -	irBlocks := map[label]ssa.BasicBlockID{} -	for i, l := range ectx.SsaBlockIDToLabels { -		irBlocks[l] = ssa.BasicBlockID(i) -	} -  	var lines []string -	for cur := ectx.RootInstr; cur != nil; cur = cur.next { +	for cur := m.rootInstr; cur != nil; cur = cur.next {  		if l, ok := begins[cur]; ok {  			var labelStr string -			if blkID, ok := irBlocks[l]; ok { -				labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID) +			if l <= m.maxSSABlockID { +				labelStr = fmt.Sprintf("%s (SSA Block: blk%d):", l, int(l))  			} else {  				labelStr = fmt.Sprintf("%s:", l)  			} @@ -508,13 +615,17 @@ func (m *machine) frameSize() int64 {  	return s  } -func (m *machine) addJmpTableTarget(targets []ssa.BasicBlock) (index int) { -	// TODO: reuse the slice! -	labels := make([]uint32, len(targets)) -	for j, target := range targets { -		labels[j] = uint32(m.executableContext.GetOrAllocateSSABlockLabel(target)) +func (m *machine) addJmpTableTarget(targets ssa.Values) (index int) { +	if m.jmpTableTargetsNext == len(m.jmpTableTargets) { +		m.jmpTableTargets = append(m.jmpTableTargets, make([]uint32, 0, len(targets.View()))) +	} + +	index = m.jmpTableTargetsNext +	m.jmpTableTargetsNext++ +	m.jmpTableTargets[index] = m.jmpTableTargets[index][:0] +	for _, targetBlockID := range targets.View() { +		target := m.compiler.SSABuilder().BasicBlock(ssa.BasicBlockID(targetBlockID)) +		m.jmpTableTargets[index] = append(m.jmpTableTargets[index], uint32(target.ID()))  	} -	index = len(m.jmpTableTargets) -	m.jmpTableTargets = append(m.jmpTableTargets, labels)  	return  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go index d9032f921..c646a8fab 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go @@ -15,9 +15,7 @@ func (m *machine) PostRegAlloc() {  // setupPrologue initializes the prologue of the function.  func (m *machine) setupPrologue() { -	ectx := m.executableContext - -	cur := ectx.RootInstr +	cur := m.rootInstr  	prevInitInst := cur.next  	// @@ -196,21 +194,20 @@ func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction {  // 1. Removes the redundant copy instruction.  // 2. Inserts the epilogue.  func (m *machine) postRegAlloc() { -	ectx := m.executableContext -	for cur := ectx.RootInstr; cur != nil; cur = cur.next { +	for cur := m.rootInstr; cur != nil; cur = cur.next {  		switch cur.kind {  		case ret:  			m.setupEpilogueAfter(cur.prev)  		case loadConstBlockArg:  			lc := cur  			next := lc.next -			m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0] +			m.pendingInstructions = m.pendingInstructions[:0]  			m.lowerLoadConstantBlockArgAfterRegAlloc(lc) -			for _, instr := range m.executableContext.PendingInstructions { +			for _, instr := range m.pendingInstructions {  				cur = linkInstr(cur, instr)  			}  			linkInstr(cur, next) -			m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0] +			m.pendingInstructions = m.pendingInstructions[:0]  		default:  			// Removes the redundant copy instruction.  			if cur.IsCopy() && cur.rn.realReg() == cur.rd.RealReg() { @@ -432,11 +429,9 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi  // CompileStackGrowCallSequence implements backend.Machine.  func (m *machine) CompileStackGrowCallSequence() []byte { -	ectx := m.executableContext -  	cur := m.allocateInstr()  	cur.asNop0() -	ectx.RootInstr = cur +	m.rootInstr = cur  	// Save the callee saved and argument registers.  	cur = m.saveRegistersInExecutionContext(cur, saveRequiredRegs) @@ -458,16 +453,14 @@ func (m *machine) CompileStackGrowCallSequence() []byte {  	ret.asRet()  	linkInstr(cur, ret) -	m.encode(ectx.RootInstr) +	m.encode(m.rootInstr)  	return m.compiler.Buf()  }  func (m *machine) addsAddOrSubStackPointer(cur *instruction, rd regalloc.VReg, diff int64, add bool) *instruction { -	ectx := m.executableContext - -	ectx.PendingInstructions = ectx.PendingInstructions[:0] +	m.pendingInstructions = m.pendingInstructions[:0]  	m.insertAddOrSubStackPointer(rd, diff, add) -	for _, inserted := range ectx.PendingInstructions { +	for _, inserted := range m.pendingInstructions {  		cur = linkInstr(cur, inserted)  	}  	return cur diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go index c7eb92cc2..f2ed53ae5 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go @@ -3,18 +3,226 @@ package arm64  // This file implements the interfaces required for register allocations. See backend.RegAllocFunctionMachine.  import ( -	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"  	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"  	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"  ) -// ClobberedRegisters implements backend.RegAllocFunctionMachine. -func (m *machine) ClobberedRegisters(regs []regalloc.VReg) { -	m.clobberedRegs = append(m.clobberedRegs[:0], regs...) +// regAllocFn implements regalloc.Function. +type regAllocFn struct { +	ssaB                   ssa.Builder +	m                      *machine +	loopNestingForestRoots []ssa.BasicBlock +	blockIter              int  } -// Swap implements backend.RegAllocFunctionMachine. -func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) { +// PostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorBegin() *labelPosition { +	f.blockIter = len(f.m.orderedSSABlockLabelPos) - 1 +	return f.PostOrderBlockIteratorNext() +} + +// PostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorNext() *labelPosition { +	if f.blockIter < 0 { +		return nil +	} +	b := f.m.orderedSSABlockLabelPos[f.blockIter] +	f.blockIter-- +	return b +} + +// ReversePostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorBegin() *labelPosition { +	f.blockIter = 0 +	return f.ReversePostOrderBlockIteratorNext() +} + +// ReversePostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorNext() *labelPosition { +	if f.blockIter >= len(f.m.orderedSSABlockLabelPos) { +		return nil +	} +	b := f.m.orderedSSABlockLabelPos[f.blockIter] +	f.blockIter++ +	return b +} + +// ClobberedRegisters implements regalloc.Function. +func (f *regAllocFn) ClobberedRegisters(regs []regalloc.VReg) { +	f.m.clobberedRegs = append(f.m.clobberedRegs[:0], regs...) +} + +// LoopNestingForestRoots implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoots() int { +	f.loopNestingForestRoots = f.ssaB.LoopNestingForestRoots() +	return len(f.loopNestingForestRoots) +} + +// LoopNestingForestRoot implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoot(i int) *labelPosition { +	root := f.loopNestingForestRoots[i] +	pos := f.m.getOrAllocateSSABlockLabelPosition(root) +	return pos +} + +// LowestCommonAncestor implements regalloc.Function. +func (f *regAllocFn) LowestCommonAncestor(blk1, blk2 *labelPosition) *labelPosition { +	sb := f.ssaB.LowestCommonAncestor(blk1.sb, blk2.sb) +	pos := f.m.getOrAllocateSSABlockLabelPosition(sb) +	return pos +} + +// Idom implements regalloc.Function. +func (f *regAllocFn) Idom(blk *labelPosition) *labelPosition { +	sb := f.ssaB.Idom(blk.sb) +	pos := f.m.getOrAllocateSSABlockLabelPosition(sb) +	return pos +} + +// SwapBefore implements regalloc.Function. +func (f *regAllocFn) SwapBefore(x1, x2, tmp regalloc.VReg, instr *instruction) { +	f.m.swap(instr.prev, x1, x2, tmp) +} + +// StoreRegisterBefore implements regalloc.Function. +func (f *regAllocFn) StoreRegisterBefore(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertStoreRegisterAt(v, instr, false) +} + +// StoreRegisterAfter implements regalloc.Function. +func (f *regAllocFn) StoreRegisterAfter(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertStoreRegisterAt(v, instr, true) +} + +// ReloadRegisterBefore implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterBefore(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertReloadRegisterAt(v, instr, false) +} + +// ReloadRegisterAfter implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterAfter(v regalloc.VReg, instr *instruction) { +	m := f.m +	m.insertReloadRegisterAt(v, instr, true) +} + +// InsertMoveBefore implements regalloc.Function. +func (f *regAllocFn) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { +	f.m.insertMoveBefore(dst, src, instr) +} + +// LoopNestingForestChild implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestChild(pos *labelPosition, i int) *labelPosition { +	childSB := pos.sb.LoopNestingForestChildren()[i] +	return f.m.getOrAllocateSSABlockLabelPosition(childSB) +} + +// Succ implements regalloc.Block. +func (f *regAllocFn) Succ(pos *labelPosition, i int) *labelPosition { +	succSB := pos.sb.Succ(i) +	if succSB.ReturnBlock() { +		return nil +	} +	return f.m.getOrAllocateSSABlockLabelPosition(succSB) +} + +// Pred implements regalloc.Block. +func (f *regAllocFn) Pred(pos *labelPosition, i int) *labelPosition { +	predSB := pos.sb.Pred(i) +	return f.m.getOrAllocateSSABlockLabelPosition(predSB) +} + +// BlockParams implements regalloc.Function. +func (f *regAllocFn) BlockParams(pos *labelPosition, regs *[]regalloc.VReg) []regalloc.VReg { +	c := f.m.compiler +	*regs = (*regs)[:0] +	for i := 0; i < pos.sb.Params(); i++ { +		v := c.VRegOf(pos.sb.Param(i)) +		*regs = append(*regs, v) +	} +	return *regs +} + +// ID implements regalloc.Block. +func (pos *labelPosition) ID() int32 { +	return int32(pos.sb.ID()) +} + +// InstrIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrIteratorBegin() *instruction { +	ret := pos.begin +	pos.cur = ret +	return ret +} + +// InstrIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrIteratorNext() *instruction { +	for { +		if pos.cur == pos.end { +			return nil +		} +		instr := pos.cur.next +		pos.cur = instr +		if instr == nil { +			return nil +		} else if instr.addedBeforeRegAlloc { +			// Only concerned about the instruction added before regalloc. +			return instr +		} +	} +} + +// InstrRevIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorBegin() *instruction { +	pos.cur = pos.end +	return pos.cur +} + +// InstrRevIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorNext() *instruction { +	for { +		if pos.cur == pos.begin { +			return nil +		} +		instr := pos.cur.prev +		pos.cur = instr +		if instr == nil { +			return nil +		} else if instr.addedBeforeRegAlloc { +			// Only concerned about the instruction added before regalloc. +			return instr +		} +	} +} + +// FirstInstr implements regalloc.Block. +func (pos *labelPosition) FirstInstr() *instruction { return pos.begin } + +// LastInstrForInsertion implements regalloc.Block. +func (pos *labelPosition) LastInstrForInsertion() *instruction { +	return lastInstrForInsertion(pos.begin, pos.end) +} + +// Preds implements regalloc.Block. +func (pos *labelPosition) Preds() int { return pos.sb.Preds() } + +// Entry implements regalloc.Block. +func (pos *labelPosition) Entry() bool { return pos.sb.EntryBlock() } + +// Succs implements regalloc.Block. +func (pos *labelPosition) Succs() int { return pos.sb.Succs() } + +// LoopHeader implements regalloc.Block. +func (pos *labelPosition) LoopHeader() bool { return pos.sb.LoopHeader() } + +// LoopNestingForestChildren implements regalloc.Block. +func (pos *labelPosition) LoopNestingForestChildren() int { +	return len(pos.sb.LoopNestingForestChildren()) +} + +func (m *machine) swap(cur *instruction, x1, x2, tmp regalloc.VReg) {  	prevNext := cur.next  	var mov1, mov2, mov3 *instruction  	if x1.RegType() == regalloc.RegTypeInt { @@ -32,12 +240,12 @@ func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {  		if !tmp.Valid() {  			r2 := x2.RealReg()  			// Temporarily spill x1 to stack. -			cur = m.InsertStoreRegisterAt(x1, cur, true).prev +			cur = m.insertStoreRegisterAt(x1, cur, true).prev  			// Then move x2 to x1.  			cur = linkInstr(cur, m.allocateInstr().asFpuMov128(x1, x2))  			linkInstr(cur, prevNext)  			// Then reload the original value on x1 from stack to r2. -			m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true) +			m.insertReloadRegisterAt(x1.SetRealReg(r2), cur, true)  		} else {  			mov1 = m.allocateInstr().asFpuMov128(tmp, x1)  			mov2 = m.allocateInstr().asFpuMov128(x1, x2) @@ -50,8 +258,7 @@ func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {  	}  } -// InsertMoveBefore implements backend.RegAllocFunctionMachine. -func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { +func (m *machine) insertMoveBefore(dst, src regalloc.VReg, instr *instruction) {  	typ := src.RegType()  	if typ != dst.RegType() {  		panic("BUG: src and dst must have the same type") @@ -70,13 +277,7 @@ func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) {  	linkInstr(cur, prevNext)  } -// SSABlockLabel implements backend.RegAllocFunctionMachine. -func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label { -	return m.executableContext.SsaBlockIDToLabels[id] -} - -// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {  	if !v.IsRealReg() {  		panic("BUG: VReg must be backed by real reg to be stored")  	} @@ -100,8 +301,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft  	return linkInstr(cur, prevNext)  } -// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {  	if !v.IsRealReg() {  		panic("BUG: VReg must be backed by real reg to be stored")  	} @@ -134,8 +334,7 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af  	return linkInstr(cur, prevNext)  } -// LastInstrForInsertion implements backend.RegAllocFunctionMachine. -func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction { +func lastInstrForInsertion(begin, end *instruction) *instruction {  	cur := end  	for cur.kind == nop0 {  		cur = cur.prev diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go index edb0e36e3..a72b86f6b 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go @@ -14,7 +14,7 @@ func UnwindStack(sp, _, top uintptr, returnAddresses []uintptr) []uintptr {  	var stackBuf []byte  	{ -		// TODO: use unsafe.Slice after floor version is set to Go 1.20. +		//nolint:staticcheck  		hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf))  		hdr.Data = sp  		hdr.Len = l @@ -78,13 +78,7 @@ func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {  	//              +-----------------+ <---- stackPointerBeforeGoCall  	//                 (low address)  	ptr := unsafe.Pointer(stackPointerBeforeGoCall) +	data := (*uint64)(unsafe.Add(ptr, 16)) // skips the (frame_size, sliceSize).  	size := *(*uint64)(unsafe.Add(ptr, 8)) -	var view []uint64 -	{ -		sh := (*reflect.SliceHeader)(unsafe.Pointer(&view)) -		sh.Data = uintptr(unsafe.Add(ptr, 16)) // skips the (frame_size, sliceSize). -		sh.Len = int(size) -		sh.Cap = int(size) -	} -	return view +	return unsafe.Slice(data, size)  }  | 
