diff options
author | 2024-08-15 00:08:55 +0000 | |
---|---|---|
committer | 2024-08-15 00:08:55 +0000 | |
commit | 09f24e044653b1327ac1c40f3ab150e3f0184f23 (patch) | |
tree | 1d9984d053fa5c8d1203abaa49b8752a1532ff11 /vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa | |
parent | update go-fastcopy to v1.1.3 (#3200) (diff) | |
download | gotosocial-09f24e044653b1327ac1c40f3ab150e3f0184f23.tar.xz |
update go-ffmpreg to v0.2.5 (pulls in latest tetratelabs/wazero) (#3203)
Diffstat (limited to 'vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa')
21 files changed, 990 insertions, 480 deletions
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go index 751050aff..96f035e58 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go @@ -14,7 +14,6 @@ var calleeSavedVRegs = []regalloc.VReg{ // CompileGoFunctionTrampoline implements backend.Machine. func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte { - ectx := m.ectx argBegin := 1 // Skips exec context by default. if needModuleContextPtr { argBegin++ @@ -25,7 +24,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * m.currentABI = abi cur := m.allocateNop() - ectx.RootInstr = cur + m.rootInstr = cur // Execution context is always the first argument. execCtrPtr := raxVReg @@ -272,7 +271,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * cur = m.revertRBPRSP(cur) linkInstr(cur, m.allocateInstr().asRet()) - m.encodeWithoutSSA(ectx.RootInstr) + m.encodeWithoutSSA(m.rootInstr) return m.c.Buf() } @@ -347,10 +346,8 @@ var stackGrowSaveVRegs = []regalloc.VReg{ // CompileStackGrowCallSequence implements backend.Machine. func (m *machine) CompileStackGrowCallSequence() []byte { - ectx := m.ectx - cur := m.allocateNop() - ectx.RootInstr = cur + m.rootInstr = cur cur = m.setupRBPRSP(cur) @@ -379,7 +376,7 @@ func (m *machine) CompileStackGrowCallSequence() []byte { cur = m.revertRBPRSP(cur) linkInstr(cur, m.allocateInstr().asRet()) - m.encodeWithoutSSA(ectx.RootInstr) + m.encodeWithoutSSA(m.rootInstr) return m.c.Buf() } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go index d27e79c0e..6a3e58f51 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go @@ -17,16 +17,6 @@ type instruction struct { kind instructionKind } -// Next implements regalloc.Instr. -func (i *instruction) Next() regalloc.Instr { - return i.next -} - -// Prev implements regalloc.Instr. -func (i *instruction) Prev() regalloc.Instr { - return i.prev -} - // IsCall implements regalloc.Instr. func (i *instruction) IsCall() bool { return i.kind == call } @@ -36,9 +26,6 @@ func (i *instruction) IsIndirectCall() bool { return i.kind == callIndirect } // IsReturn implements regalloc.Instr. func (i *instruction) IsReturn() bool { return i.kind == ret } -// AddedBeforeRegAlloc implements regalloc.Instr. -func (i *instruction) AddedBeforeRegAlloc() bool { return i.addedBeforeRegAlloc } - // String implements regalloc.Instr. func (i *instruction) String() string { switch i.kind { @@ -651,26 +638,14 @@ func resetInstruction(i *instruction) { *i = instruction{} } -func setNext(i *instruction, next *instruction) { - i.next = next -} - -func setPrev(i *instruction, prev *instruction) { - i.prev = prev -} - -func asNop(i *instruction) { - i.kind = nop0 -} - -func (i *instruction) asNop0WithLabel(label backend.Label) *instruction { //nolint +func (i *instruction) asNop0WithLabel(label label) *instruction { //nolint i.kind = nop0 i.u1 = uint64(label) return i } -func (i *instruction) nop0Label() backend.Label { - return backend.Label(i.u1) +func (i *instruction) nop0Label() label { + return label(i.u1) } type instructionKind byte @@ -1161,7 +1136,7 @@ func (i *instruction) asJmp(target operand) *instruction { return i } -func (i *instruction) jmpLabel() backend.Label { +func (i *instruction) jmpLabel() label { switch i.kind { case jmp, jmpIf, lea, xmmUnaryRmR: return i.op1.label() diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go index bee673d25..befe8c643 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go @@ -130,9 +130,9 @@ func (m *machine) lowerAddendsToAmode(x, y addend, offBase uint32) *amode { } } -func (m *machine) lowerAddend(x *backend.SSAValueDefinition) addend { - if x.IsFromBlockParam() { - return addend{x.BlkParamVReg, 0, 0} +func (m *machine) lowerAddend(x backend.SSAValueDefinition) addend { + if !x.IsFromInstr() { + return addend{m.c.VRegOf(x.V), 0, 0} } // Ensure the addend is not referenced in multiple places; we will discard nested Iadds. op := m.c.MatchInstrOneOf(x, addendsMatchOpcodes[:]) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go index 61ae6f406..aeeb6b645 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go @@ -16,18 +16,13 @@ import ( // NewBackend returns a new backend for arm64. func NewBackend() backend.Machine { - ectx := backend.NewExecutableContextT[instruction]( - resetInstruction, - setNext, - setPrev, - asNop, - ) - return &machine{ - ectx: ectx, + m := &machine{ cpuFeatures: platform.CpuFeatures, - regAlloc: regalloc.NewAllocator(regInfo), + regAlloc: regalloc.NewAllocator[*instruction, *labelPosition, *regAllocFn](regInfo), spillSlots: map[regalloc.VRegID]int64{}, amodePool: wazevoapi.NewPool[amode](nil), + labelPositionPool: wazevoapi.NewIDedPool[labelPosition](resetLabelPosition), + instrPool: wazevoapi.NewPool[instruction](resetInstruction), constSwizzleMaskConstIndex: -1, constSqmulRoundSatIndex: -1, constI8x16SHLMaskTableIndex: -1, @@ -41,23 +36,46 @@ func NewBackend() backend.Machine { constExtAddPairwiseI16x8uMask1Index: -1, constExtAddPairwiseI16x8uMask2Index: -1, } + m.regAllocFn.m = m + return m } type ( // machine implements backend.Machine for amd64. machine struct { c backend.Compiler - ectx *backend.ExecutableContextT[instruction] stackBoundsCheckDisabled bool + instrPool wazevoapi.Pool[instruction] amodePool wazevoapi.Pool[amode] cpuFeatures platform.CpuFeatureFlags - regAlloc regalloc.Allocator - regAllocFn *backend.RegAllocFunction[*instruction, *machine] + regAlloc regalloc.Allocator[*instruction, *labelPosition, *regAllocFn] + regAllocFn regAllocFn regAllocStarted bool + // labelPositionPool is the pool of labelPosition. The id is the label where + // if the label is less than the maxSSABlockID, it's the ssa.BasicBlockID. + labelPositionPool wazevoapi.IDedPool[labelPosition] + // nextLabel is the next label to be allocated. The first free label comes after maxSSABlockID + // so that we can have an identical label for the SSA block ID, which is useful for debugging. + nextLabel label + // rootInstr is the first instruction of the function. + rootInstr *instruction + // currentLabelPos is the currently-compiled ssa.BasicBlock's labelPosition. + currentLabelPos *labelPosition + // orderedSSABlockLabelPos is the ordered list of labelPosition in the generated code for each ssa.BasicBlock. + orderedSSABlockLabelPos []*labelPosition + // returnLabelPos is the labelPosition for the return block. + returnLabelPos labelPosition + // perBlockHead and perBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock. + perBlockHead, perBlockEnd *instruction + // pendingInstructions are the instructions which are not yet emitted into the instruction list. + pendingInstructions []*instruction + // maxSSABlockID is the maximum ssa.BasicBlockID in the current function. + maxSSABlockID label + spillSlotSize int64 spillSlots map[regalloc.VRegID]int64 currentABI *backend.FunctionABI @@ -67,8 +85,11 @@ type ( labelResolutionPends []labelResolutionPend + // jmpTableTargets holds the labels of the jump table targets. jmpTableTargets [][]uint32 - consts []_const + // jmpTableTargetNext is the index to the jmpTableTargets slice to be used for the next jump table. + jmpTableTargetsNext int + consts []_const constSwizzleMaskConstIndex, constSqmulRoundSatIndex, constI8x16SHLMaskTableIndex, constI8x16LogicalSHRMaskTableIndex, @@ -79,9 +100,10 @@ type ( } _const struct { - lo, hi uint64 - _var []byte - label *labelPosition + lo, hi uint64 + _var []byte + label label + labelPos *labelPosition } labelResolutionPend struct { @@ -90,22 +112,73 @@ type ( // imm32Offset is the offset of the last 4 bytes of the instruction. imm32Offset int64 } +) - labelPosition = backend.LabelPosition[instruction] +type ( + // label represents a position in the generated code which is either + // a real instruction or the constant InstructionPool (e.g. jump tables). + // + // This is exactly the same as the traditional "label" in assembly code. + label uint32 + + // labelPosition represents the regions of the generated code which the label represents. + // This implements regalloc.Block. + labelPosition struct { + // sb is not nil if this corresponds to a ssa.BasicBlock. + sb ssa.BasicBlock + // cur is used to walk through the instructions in the block during the register allocation. + cur, + // begin and end are the first and last instructions of the block. + begin, end *instruction + // binaryOffset is the offset in the binary where the label is located. + binaryOffset int64 + } ) -func (m *machine) getOrAllocateConstLabel(i *int, _var []byte) backend.Label { +// String implements backend.Machine. +func (l label) String() string { + return fmt.Sprintf("L%d", l) +} + +func resetLabelPosition(l *labelPosition) { + *l = labelPosition{} +} + +const labelReturn = math.MaxUint32 + +func ssaBlockLabel(sb ssa.BasicBlock) label { + if sb.ReturnBlock() { + return labelReturn + } + return label(sb.ID()) +} + +// getOrAllocateSSABlockLabelPosition returns the labelPosition for the given basic block. +func (m *machine) getOrAllocateSSABlockLabelPosition(sb ssa.BasicBlock) *labelPosition { + if sb.ReturnBlock() { + m.returnLabelPos.sb = sb + return &m.returnLabelPos + } + + l := ssaBlockLabel(sb) + pos := m.labelPositionPool.GetOrAllocate(int(l)) + pos.sb = sb + return pos +} + +func (m *machine) getOrAllocateConstLabel(i *int, _var []byte) label { index := *i if index == -1 { - label := m.allocateLabel() + l, pos := m.allocateLabel() index = len(m.consts) m.consts = append(m.consts, _const{ - _var: _var, - label: label, + _var: _var, + label: l, + labelPos: pos, }) *i = index } - return m.consts[index].label.L + return m.consts[index].label } // Reset implements backend.Machine. @@ -120,18 +193,20 @@ func (m *machine) Reset() { } m.stackBoundsCheckDisabled = false - m.ectx.Reset() - - m.regAllocFn.Reset() m.regAlloc.Reset() + m.labelPositionPool.Reset() + m.instrPool.Reset() m.regAllocStarted = false m.clobberedRegs = m.clobberedRegs[:0] m.spillSlotSize = 0 m.maxRequiredStackSizeForCalls = 0 + m.perBlockHead, m.perBlockEnd, m.rootInstr = nil, nil, nil + m.pendingInstructions = m.pendingInstructions[:0] + m.orderedSSABlockLabelPos = m.orderedSSABlockLabelPos[:0] m.amodePool.Reset() - m.jmpTableTargets = m.jmpTableTargets[:0] + m.jmpTableTargetsNext = 0 m.constSwizzleMaskConstIndex = -1 m.constSqmulRoundSatIndex = -1 m.constI8x16SHLMaskTableIndex = -1 @@ -146,8 +221,63 @@ func (m *machine) Reset() { m.constExtAddPairwiseI16x8uMask2Index = -1 } -// ExecutableContext implements backend.Machine. -func (m *machine) ExecutableContext() backend.ExecutableContext { return m.ectx } +// StartLoweringFunction implements backend.Machine StartLoweringFunction. +func (m *machine) StartLoweringFunction(maxBlockID ssa.BasicBlockID) { + m.maxSSABlockID = label(maxBlockID) + m.nextLabel = label(maxBlockID) + 1 +} + +// LinkAdjacentBlocks implements backend.Machine. +func (m *machine) LinkAdjacentBlocks(prev, next ssa.BasicBlock) { + prevPos, nextPos := m.getOrAllocateSSABlockLabelPosition(prev), m.getOrAllocateSSABlockLabelPosition(next) + prevPos.end.next = nextPos.begin +} + +// StartBlock implements backend.Machine. +func (m *machine) StartBlock(blk ssa.BasicBlock) { + m.currentLabelPos = m.getOrAllocateSSABlockLabelPosition(blk) + labelPos := m.currentLabelPos + end := m.allocateNop() + m.perBlockHead, m.perBlockEnd = end, end + labelPos.begin, labelPos.end = end, end + m.orderedSSABlockLabelPos = append(m.orderedSSABlockLabelPos, labelPos) +} + +// EndBlock implements ExecutableContext. +func (m *machine) EndBlock() { + // Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions. + m.insertAtPerBlockHead(m.allocateNop()) + + m.currentLabelPos.begin = m.perBlockHead + + if m.currentLabelPos.sb.EntryBlock() { + m.rootInstr = m.perBlockHead + } +} + +func (m *machine) insertAtPerBlockHead(i *instruction) { + if m.perBlockHead == nil { + m.perBlockHead = i + m.perBlockEnd = i + return + } + + i.next = m.perBlockHead + m.perBlockHead.prev = i + m.perBlockHead = i +} + +// FlushPendingInstructions implements backend.Machine. +func (m *machine) FlushPendingInstructions() { + l := len(m.pendingInstructions) + if l == 0 { + return + } + for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order. + m.insertAtPerBlockHead(m.pendingInstructions[i]) + } + m.pendingInstructions = m.pendingInstructions[:0] +} // DisableStackCheck implements backend.Machine. func (m *machine) DisableStackCheck() { m.stackBoundsCheckDisabled = true } @@ -155,23 +285,17 @@ func (m *machine) DisableStackCheck() { m.stackBoundsCheckDisabled = true } // SetCompiler implements backend.Machine. func (m *machine) SetCompiler(c backend.Compiler) { m.c = c - m.regAllocFn = backend.NewRegAllocFunction[*instruction, *machine](m, c.SSABuilder(), c) + m.regAllocFn.ssaB = c.SSABuilder() } // SetCurrentABI implements backend.Machine. -func (m *machine) SetCurrentABI(abi *backend.FunctionABI) { - m.currentABI = abi -} +func (m *machine) SetCurrentABI(abi *backend.FunctionABI) { m.currentABI = abi } // RegAlloc implements backend.Machine. func (m *machine) RegAlloc() { rf := m.regAllocFn - for _, pos := range m.ectx.OrderedBlockLabels { - rf.AddBlock(pos.SB, pos.L, pos.Begin, pos.End) - } - m.regAllocStarted = true - m.regAlloc.DoAllocation(rf) + m.regAlloc.DoAllocation(&rf) // Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes. m.spillSlotSize = (m.spillSlotSize + 15) &^ 15 } @@ -184,49 +308,54 @@ func (m *machine) InsertReturn() { // LowerSingleBranch implements backend.Machine. func (m *machine) LowerSingleBranch(b *ssa.Instruction) { - ectx := m.ectx switch b.Opcode() { case ssa.OpcodeJump: - _, _, targetBlk := b.BranchData() + _, _, targetBlkID := b.BranchData() if b.IsFallthroughJump() { return } jmp := m.allocateInstr() - target := ectx.GetOrAllocateSSABlockLabel(targetBlk) - if target == backend.LabelReturn { + target := ssaBlockLabel(m.c.SSABuilder().BasicBlock(targetBlkID)) + if target == labelReturn { jmp.asRet() } else { jmp.asJmp(newOperandLabel(target)) } m.insert(jmp) case ssa.OpcodeBrTable: - index, target := b.BrTableData() - m.lowerBrTable(index, target) + index, targetBlkIDs := b.BrTableData() + m.lowerBrTable(index, targetBlkIDs) default: panic("BUG: unexpected branch opcode" + b.Opcode().String()) } } -func (m *machine) addJmpTableTarget(targets []ssa.BasicBlock) (index int) { - // TODO: reuse the slice! - labels := make([]uint32, len(targets)) - for j, target := range targets { - labels[j] = uint32(m.ectx.GetOrAllocateSSABlockLabel(target)) +func (m *machine) addJmpTableTarget(targets ssa.Values) (index int) { + if m.jmpTableTargetsNext == len(m.jmpTableTargets) { + m.jmpTableTargets = append(m.jmpTableTargets, make([]uint32, 0, len(targets.View()))) + } + + index = m.jmpTableTargetsNext + m.jmpTableTargetsNext++ + m.jmpTableTargets[index] = m.jmpTableTargets[index][:0] + for _, targetBlockID := range targets.View() { + target := m.c.SSABuilder().BasicBlock(ssa.BasicBlockID(targetBlockID)) + m.jmpTableTargets[index] = append(m.jmpTableTargets[index], uint32(ssaBlockLabel(target))) } - index = len(m.jmpTableTargets) - m.jmpTableTargets = append(m.jmpTableTargets, labels) return } var condBranchMatches = [...]ssa.Opcode{ssa.OpcodeIcmp, ssa.OpcodeFcmp} -func (m *machine) lowerBrTable(index ssa.Value, targets []ssa.BasicBlock) { +func (m *machine) lowerBrTable(index ssa.Value, targets ssa.Values) { _v := m.getOperand_Reg(m.c.ValueDefinition(index)) v := m.copyToTmp(_v.reg()) + targetCount := len(targets.View()) + // First, we need to do the bounds check. maxIndex := m.c.AllocateVReg(ssa.TypeI32) - m.lowerIconst(maxIndex, uint64(len(targets)-1), false) + m.lowerIconst(maxIndex, uint64(targetCount-1), false) cmp := m.allocateInstr().asCmpRmiR(true, newOperandReg(maxIndex), v, false) m.insert(cmp) @@ -255,23 +384,22 @@ func (m *machine) lowerBrTable(index ssa.Value, targets []ssa.BasicBlock) { jmpTable := m.allocateInstr() targetSliceIndex := m.addJmpTableTarget(targets) - jmpTable.asJmpTableSequence(targetSliceIndex, len(targets)) + jmpTable.asJmpTableSequence(targetSliceIndex, targetCount) m.insert(jmpTable) } // LowerConditionalBranch implements backend.Machine. func (m *machine) LowerConditionalBranch(b *ssa.Instruction) { - exctx := m.ectx - cval, args, targetBlk := b.BranchData() + cval, args, targetBlkID := b.BranchData() if len(args) > 0 { panic(fmt.Sprintf( "conditional branch shouldn't have args; likely a bug in critical edge splitting: from %s to %s", - exctx.CurrentSSABlk, - targetBlk, + m.currentLabelPos.sb, + targetBlkID, )) } - target := exctx.GetOrAllocateSSABlockLabel(targetBlk) + target := ssaBlockLabel(m.c.SSABuilder().BasicBlock(targetBlkID)) cvalDef := m.c.ValueDefinition(cval) switch m.c.MatchInstrOneOf(cvalDef, condBranchMatches[:]) { @@ -1272,9 +1400,9 @@ func (m *machine) lowerVconst(dst regalloc.VReg, lo, hi uint64) { } load := m.allocateInstr() - constLabel := m.allocateLabel() - m.consts = append(m.consts, _const{label: constLabel, lo: lo, hi: hi}) - load.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(constLabel.L)), dst) + l, pos := m.allocateLabel() + m.consts = append(m.consts, _const{label: l, labelPos: pos, lo: lo, hi: hi}) + load.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(l)), dst) m.insert(load) } @@ -1473,21 +1601,24 @@ func (m *machine) lowerExitIfTrueWithCode(execCtx regalloc.VReg, cond ssa.Value, jmpIf.asJmpIf(condFromSSAIntCmpCond(c).invert(), newOperandLabel(l)) } -func (m *machine) tryLowerBandToFlag(x, y *backend.SSAValueDefinition) (ok bool) { - var target *backend.SSAValueDefinition +func (m *machine) tryLowerBandToFlag(x, y backend.SSAValueDefinition) (ok bool) { + var target backend.SSAValueDefinition + var got bool if x.IsFromInstr() && x.Instr.Constant() && x.Instr.ConstantVal() == 0 { if m.c.MatchInstr(y, ssa.OpcodeBand) { target = y + got = true } } if y.IsFromInstr() && y.Instr.Constant() && y.Instr.ConstantVal() == 0 { if m.c.MatchInstr(x, ssa.OpcodeBand) { target = x + got = true } } - if target == nil { + if !got { return false } @@ -1522,7 +1653,7 @@ func (m *machine) allocateExitInstructions(execCtx, exitCodeReg regalloc.VReg) ( return } -func (m *machine) lowerExitWithCode(execCtx regalloc.VReg, code wazevoapi.ExitCode) (afterLabel backend.Label) { +func (m *machine) lowerExitWithCode(execCtx regalloc.VReg, code wazevoapi.ExitCode) (afterLabel label) { exitCodeReg := rbpVReg saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtx, exitCodeReg) @@ -1819,9 +1950,9 @@ func (m *machine) lowerCall(si *ssa.Instruction) { // callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the // caller side of the function call. -func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, stackSlotSize int64) { +func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def backend.SSAValueDefinition, stackSlotSize int64) { arg := &a.Args[argIndex] - if def != nil && def.IsFromInstr() { + if def.IsFromInstr() { // Constant instructions are inlined. if inst := def.Instr; inst.Constant() { m.insertLoadConstant(inst, reg) @@ -1904,25 +2035,20 @@ func (m *machine) InsertMove(dst, src regalloc.VReg, typ ssa.Type) { // Format implements backend.Machine. func (m *machine) Format() string { - ectx := m.ectx - begins := map[*instruction]backend.Label{} - for _, pos := range ectx.LabelPositions { + begins := map[*instruction]label{} + for l := label(0); l < m.nextLabel; l++ { + pos := m.labelPositionPool.Get(int(l)) if pos != nil { - begins[pos.Begin] = pos.L + begins[pos.begin] = l } } - irBlocks := map[backend.Label]ssa.BasicBlockID{} - for i, l := range ectx.SsaBlockIDToLabels { - irBlocks[l] = ssa.BasicBlockID(i) - } - var lines []string - for cur := ectx.RootInstr; cur != nil; cur = cur.next { + for cur := m.rootInstr; cur != nil; cur = cur.next { if l, ok := begins[cur]; ok { var labelStr string - if blkID, ok := irBlocks[l]; ok { - labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID) + if l <= m.maxSSABlockID { + labelStr = fmt.Sprintf("%s (SSA Block: blk%d):", l, l) } else { labelStr = fmt.Sprintf("%s:", l) } @@ -1935,9 +2061,9 @@ func (m *machine) Format() string { } for _, vc := range m.consts { if vc._var == nil { - lines = append(lines, fmt.Sprintf("%s: const [%d %d]", vc.label.L, vc.lo, vc.hi)) + lines = append(lines, fmt.Sprintf("%s: const [%d %d]", vc.label, vc.lo, vc.hi)) } else { - lines = append(lines, fmt.Sprintf("%s: const %#x", vc.label.L, vc._var)) + lines = append(lines, fmt.Sprintf("%s: const %#x", vc.label, vc._var)) } } return "\n" + strings.Join(lines, "\n") + "\n" @@ -1945,18 +2071,14 @@ func (m *machine) Format() string { func (m *machine) encodeWithoutSSA(root *instruction) { m.labelResolutionPends = m.labelResolutionPends[:0] - ectx := m.ectx - bufPtr := m.c.BufPtr() for cur := root; cur != nil; cur = cur.next { offset := int64(len(*bufPtr)) if cur.kind == nop0 { l := cur.nop0Label() - if int(l) >= len(ectx.LabelPositions) { - continue - } - if pos := ectx.LabelPositions[l]; pos != nil { - pos.BinaryOffset = offset + pos := m.labelPositionPool.Get(int(l)) + if pos != nil { + pos.binaryOffset = offset } } @@ -1973,7 +2095,7 @@ func (m *machine) encodeWithoutSSA(root *instruction) { switch p.instr.kind { case jmp, jmpIf, lea: target := p.instr.jmpLabel() - targetOffset := ectx.LabelPositions[target].BinaryOffset + targetOffset := m.labelPositionPool.Get(int(target)).binaryOffset imm32Offset := p.imm32Offset jmpOffset := int32(targetOffset - (p.imm32Offset + 4)) // +4 because RIP points to the next instruction. binary.LittleEndian.PutUint32((*bufPtr)[imm32Offset:], uint32(jmpOffset)) @@ -1985,33 +2107,33 @@ func (m *machine) encodeWithoutSSA(root *instruction) { // Encode implements backend.Machine Encode. func (m *machine) Encode(ctx context.Context) (err error) { - ectx := m.ectx bufPtr := m.c.BufPtr() var fn string var fnIndex int - var labelToSSABlockID map[backend.Label]ssa.BasicBlockID + var labelPosToLabel map[*labelPosition]label if wazevoapi.PerfMapEnabled { fn = wazevoapi.GetCurrentFunctionName(ctx) - labelToSSABlockID = make(map[backend.Label]ssa.BasicBlockID) - for i, l := range ectx.SsaBlockIDToLabels { - labelToSSABlockID[l] = ssa.BasicBlockID(i) + labelPosToLabel = make(map[*labelPosition]label) + for i := 0; i <= m.labelPositionPool.MaxIDEncountered(); i++ { + pos := m.labelPositionPool.Get(i) + labelPosToLabel[pos] = label(i) } fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx) } m.labelResolutionPends = m.labelResolutionPends[:0] - for _, pos := range ectx.OrderedBlockLabels { + for _, pos := range m.orderedSSABlockLabelPos { offset := int64(len(*bufPtr)) - pos.BinaryOffset = offset - for cur := pos.Begin; cur != pos.End.next; cur = cur.next { + pos.binaryOffset = offset + for cur := pos.begin; cur != pos.end.next; cur = cur.next { offset := int64(len(*bufPtr)) switch cur.kind { case nop0: l := cur.nop0Label() - if pos := ectx.LabelPositions[l]; pos != nil { - pos.BinaryOffset = offset + if pos := m.labelPositionPool.Get(int(l)); pos != nil { + pos.binaryOffset = offset } case sourceOffsetInfo: m.c.AddSourceOffsetInfo(offset, cur.sourceOffsetInfo()) @@ -2026,22 +2148,16 @@ func (m *machine) Encode(ctx context.Context) (err error) { } if wazevoapi.PerfMapEnabled { - l := pos.L - var labelStr string - if blkID, ok := labelToSSABlockID[l]; ok { - labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID) - } else { - labelStr = l.String() - } + l := labelPosToLabel[pos] size := int64(len(*bufPtr)) - offset - wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr)) + wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, l)) } } for i := range m.consts { offset := int64(len(*bufPtr)) vc := &m.consts[i] - vc.label.BinaryOffset = offset + vc.labelPos.binaryOffset = offset if vc._var == nil { lo, hi := vc.lo, vc.hi m.c.Emit8Bytes(lo) @@ -2059,7 +2175,7 @@ func (m *machine) Encode(ctx context.Context) (err error) { switch p.instr.kind { case jmp, jmpIf, lea, xmmUnaryRmR: target := p.instr.jmpLabel() - targetOffset := ectx.LabelPositions[target].BinaryOffset + targetOffset := m.labelPositionPool.Get(int(target)).binaryOffset imm32Offset := p.imm32Offset jmpOffset := int32(targetOffset - (p.imm32Offset + 4)) // +4 because RIP points to the next instruction. binary.LittleEndian.PutUint32(buf[imm32Offset:], uint32(jmpOffset)) @@ -2068,7 +2184,7 @@ func (m *machine) Encode(ctx context.Context) (err error) { // Each entry is the offset from the beginning of the jmpTableIsland instruction in 8 bytes. targets := m.jmpTableTargets[p.instr.u1] for i, l := range targets { - targetOffset := ectx.LabelPositions[backend.Label(l)].BinaryOffset + targetOffset := m.labelPositionPool.Get(int(l)).binaryOffset jmpOffset := targetOffset - tableBegin binary.LittleEndian.PutUint64(buf[tableBegin+int64(i)*8:], uint64(jmpOffset)) } @@ -2097,7 +2213,7 @@ func (m *machine) ResolveRelocations(refToBinaryOffset []int, binary []byte, rel // CallTrampolineIslandInfo implements backend.Machine CallTrampolineIslandInfo. func (m *machine) CallTrampolineIslandInfo(_ int) (_, _ int, _ error) { return } -func (m *machine) lowerIcmpToFlag(xd, yd *backend.SSAValueDefinition, _64 bool) { +func (m *machine) lowerIcmpToFlag(xd, yd backend.SSAValueDefinition, _64 bool) { x := m.getOperand_Reg(xd) y := m.getOperand_Mem_Imm32_Reg(yd) cmp := m.allocateInstr().asCmpRmiR(true, y, x.reg(), _64) @@ -2140,7 +2256,7 @@ func (m *machine) lowerFcmpToFlags(instr *ssa.Instruction) (f1, f2 cond, and boo // allocateInstr allocates an instruction. func (m *machine) allocateInstr() *instruction { - instr := m.ectx.InstructionPool.Allocate() + instr := m.instrPool.Allocate() if !m.regAllocStarted { instr.addedBeforeRegAlloc = true } @@ -2154,24 +2270,22 @@ func (m *machine) allocateNop() *instruction { } func (m *machine) insert(i *instruction) { - ectx := m.ectx - ectx.PendingInstructions = append(ectx.PendingInstructions, i) + m.pendingInstructions = append(m.pendingInstructions, i) } -func (m *machine) allocateBrTarget() (nop *instruction, l backend.Label) { //nolint - pos := m.allocateLabel() - l = pos.L +func (m *machine) allocateBrTarget() (nop *instruction, l label) { //nolint + l, pos := m.allocateLabel() nop = m.allocateInstr() nop.asNop0WithLabel(l) - pos.Begin, pos.End = nop, nop + pos.begin, pos.end = nop, nop return } -func (m *machine) allocateLabel() *labelPosition { - ectx := m.ectx - l := ectx.AllocateLabel() - pos := ectx.GetOrAllocateLabelPosition(l) - return pos +func (m *machine) allocateLabel() (label, *labelPosition) { + l := m.nextLabel + pos := m.labelPositionPool.GetOrAllocate(int(l)) + m.nextLabel++ + return l, pos } func (m *machine) getVRegSpillSlotOffsetFromSP(id regalloc.VRegID, size byte) int64 { @@ -3185,22 +3299,22 @@ func (m *machine) lowerShuffle(x, y ssa.Value, lo, hi uint64, ret ssa.Value) { } } - xmaskLabel := m.allocateLabel() - m.consts = append(m.consts, _const{lo: xMask[0], hi: xMask[1], label: xmaskLabel}) - ymaskLabel := m.allocateLabel() - m.consts = append(m.consts, _const{lo: yMask[0], hi: yMask[1], label: ymaskLabel}) + xl, xmaskPos := m.allocateLabel() + m.consts = append(m.consts, _const{lo: xMask[0], hi: xMask[1], label: xl, labelPos: xmaskPos}) + yl, ymaskPos := m.allocateLabel() + m.consts = append(m.consts, _const{lo: yMask[0], hi: yMask[1], label: yl, labelPos: ymaskPos}) xx, yy := m.getOperand_Reg(m.c.ValueDefinition(x)), m.getOperand_Reg(m.c.ValueDefinition(y)) tmpX, tmpY := m.copyToTmp(xx.reg()), m.copyToTmp(yy.reg()) // Apply mask to X. tmp := m.c.AllocateVReg(ssa.TypeV128) - loadMaskLo := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(xmaskLabel.L)), tmp) + loadMaskLo := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(xl)), tmp) m.insert(loadMaskLo) m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmp), tmpX)) // Apply mask to Y. - loadMaskHi := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(ymaskLabel.L)), tmp) + loadMaskHi := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(yl)), tmp) m.insert(loadMaskHi) m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmp), tmpY)) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go index 8fa974c66..e53729860 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go @@ -12,7 +12,7 @@ func (m *machine) PostRegAlloc() { } func (m *machine) setupPrologue() { - cur := m.ectx.RootInstr + cur := m.rootInstr prevInitInst := cur.next // At this point, we have the stack layout as follows: @@ -130,14 +130,13 @@ func (m *machine) setupPrologue() { // 3. Inserts the dec/inc RSP instruction right before/after the call instruction. // 4. Lowering that is supposed to be done after regalloc. func (m *machine) postRegAlloc() { - ectx := m.ectx - for cur := ectx.RootInstr; cur != nil; cur = cur.next { + for cur := m.rootInstr; cur != nil; cur = cur.next { switch k := cur.kind; k { case ret: m.setupEpilogueAfter(cur.prev) continue case fcvtToSintSequence, fcvtToUintSequence: - m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] if k == fcvtToSintSequence { m.lowerFcvtToSintSequenceAfterRegalloc(cur) } else { @@ -146,29 +145,29 @@ func (m *machine) postRegAlloc() { prev := cur.prev next := cur.next cur := prev - for _, instr := range m.ectx.PendingInstructions { + for _, instr := range m.pendingInstructions { cur = linkInstr(cur, instr) } linkInstr(cur, next) continue case xmmCMov: - m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] m.lowerXmmCmovAfterRegAlloc(cur) prev := cur.prev next := cur.next cur := prev - for _, instr := range m.ectx.PendingInstructions { + for _, instr := range m.pendingInstructions { cur = linkInstr(cur, instr) } linkInstr(cur, next) continue case idivRemSequence: - m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] m.lowerIDivRemSequenceAfterRegAlloc(cur) prev := cur.prev next := cur.next cur := prev - for _, instr := range m.ectx.PendingInstructions { + for _, instr := range m.pendingInstructions { cur = linkInstr(cur, instr) } linkInstr(cur, next) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go index 0bb28ee9e..de9dcc944 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go @@ -1,13 +1,226 @@ package amd64 import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" ) -// InsertMoveBefore implements backend.RegAllocFunctionMachine. -func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { +// regAllocFn implements regalloc.Function. +type regAllocFn struct { + ssaB ssa.Builder + m *machine + loopNestingForestRoots []ssa.BasicBlock + blockIter int +} + +// PostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorBegin() *labelPosition { + f.blockIter = len(f.m.orderedSSABlockLabelPos) - 1 + return f.PostOrderBlockIteratorNext() +} + +// PostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorNext() *labelPosition { + if f.blockIter < 0 { + return nil + } + b := f.m.orderedSSABlockLabelPos[f.blockIter] + f.blockIter-- + return b +} + +// ReversePostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorBegin() *labelPosition { + f.blockIter = 0 + return f.ReversePostOrderBlockIteratorNext() +} + +// ReversePostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorNext() *labelPosition { + if f.blockIter >= len(f.m.orderedSSABlockLabelPos) { + return nil + } + b := f.m.orderedSSABlockLabelPos[f.blockIter] + f.blockIter++ + return b +} + +// ClobberedRegisters implements regalloc.Function. +func (f *regAllocFn) ClobberedRegisters(regs []regalloc.VReg) { + f.m.clobberedRegs = append(f.m.clobberedRegs[:0], regs...) +} + +// LoopNestingForestRoots implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoots() int { + f.loopNestingForestRoots = f.ssaB.LoopNestingForestRoots() + return len(f.loopNestingForestRoots) +} + +// LoopNestingForestRoot implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoot(i int) *labelPosition { + root := f.loopNestingForestRoots[i] + pos := f.m.getOrAllocateSSABlockLabelPosition(root) + return pos +} + +// LowestCommonAncestor implements regalloc.Function. +func (f *regAllocFn) LowestCommonAncestor(blk1, blk2 *labelPosition) *labelPosition { + sb := f.ssaB.LowestCommonAncestor(blk1.sb, blk2.sb) + pos := f.m.getOrAllocateSSABlockLabelPosition(sb) + return pos +} + +// Idom implements regalloc.Function. +func (f *regAllocFn) Idom(blk *labelPosition) *labelPosition { + sb := f.ssaB.Idom(blk.sb) + pos := f.m.getOrAllocateSSABlockLabelPosition(sb) + return pos +} + +// SwapBefore implements regalloc.Function. +func (f *regAllocFn) SwapBefore(x1, x2, tmp regalloc.VReg, instr *instruction) { + f.m.swap(instr.prev, x1, x2, tmp) +} + +// StoreRegisterBefore implements regalloc.Function. +func (f *regAllocFn) StoreRegisterBefore(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertStoreRegisterAt(v, instr, false) +} + +// StoreRegisterAfter implements regalloc.Function. +func (f *regAllocFn) StoreRegisterAfter(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertStoreRegisterAt(v, instr, true) +} + +// ReloadRegisterBefore implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterBefore(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertReloadRegisterAt(v, instr, false) +} + +// ReloadRegisterAfter implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterAfter(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertReloadRegisterAt(v, instr, true) +} + +// InsertMoveBefore implements regalloc.Function. +func (f *regAllocFn) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { + f.m.insertMoveBefore(dst, src, instr) +} + +// LoopNestingForestChild implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestChild(pos *labelPosition, i int) *labelPosition { + childSB := pos.sb.LoopNestingForestChildren()[i] + return f.m.getOrAllocateSSABlockLabelPosition(childSB) +} + +// Succ implements regalloc.Block. +func (f *regAllocFn) Succ(pos *labelPosition, i int) *labelPosition { + succSB := pos.sb.Succ(i) + if succSB.ReturnBlock() { + return nil + } + return f.m.getOrAllocateSSABlockLabelPosition(succSB) +} + +// Pred implements regalloc.Block. +func (f *regAllocFn) Pred(pos *labelPosition, i int) *labelPosition { + predSB := pos.sb.Pred(i) + return f.m.getOrAllocateSSABlockLabelPosition(predSB) +} + +// BlockParams implements regalloc.Function. +func (f *regAllocFn) BlockParams(pos *labelPosition, regs *[]regalloc.VReg) []regalloc.VReg { + c := f.m.c + *regs = (*regs)[:0] + for i := 0; i < pos.sb.Params(); i++ { + v := c.VRegOf(pos.sb.Param(i)) + *regs = append(*regs, v) + } + return *regs +} + +// ID implements regalloc.Block. +func (pos *labelPosition) ID() int32 { + return int32(pos.sb.ID()) +} + +// InstrIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrIteratorBegin() *instruction { + ret := pos.begin + pos.cur = ret + return ret +} + +// InstrIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrIteratorNext() *instruction { + for { + if pos.cur == pos.end { + return nil + } + instr := pos.cur.next + pos.cur = instr + if instr == nil { + return nil + } else if instr.addedBeforeRegAlloc { + // Only concerned about the instruction added before regalloc. + return instr + } + } +} + +// InstrRevIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorBegin() *instruction { + pos.cur = pos.end + return pos.cur +} + +// InstrRevIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorNext() *instruction { + for { + if pos.cur == pos.begin { + return nil + } + instr := pos.cur.prev + pos.cur = instr + if instr == nil { + return nil + } else if instr.addedBeforeRegAlloc { + // Only concerned about the instruction added before regalloc. + return instr + } + } +} + +// FirstInstr implements regalloc.Block. +func (pos *labelPosition) FirstInstr() *instruction { return pos.begin } + +// LastInstrForInsertion implements regalloc.Block. +func (pos *labelPosition) LastInstrForInsertion() *instruction { + return lastInstrForInsertion(pos.begin, pos.end) +} + +// Preds implements regalloc.Block. +func (pos *labelPosition) Preds() int { return pos.sb.Preds() } + +// Entry implements regalloc.Block. +func (pos *labelPosition) Entry() bool { return pos.sb.EntryBlock() } + +// Succs implements regalloc.Block. +func (pos *labelPosition) Succs() int { return pos.sb.Succs() } + +// LoopHeader implements regalloc.Block. +func (pos *labelPosition) LoopHeader() bool { return pos.sb.LoopHeader() } + +// LoopNestingForestChildren implements regalloc.Block. +func (pos *labelPosition) LoopNestingForestChildren() int { + return len(pos.sb.LoopNestingForestChildren()) +} + +func (m *machine) insertMoveBefore(dst, src regalloc.VReg, instr *instruction) { typ := src.RegType() if typ != dst.RegType() { panic("BUG: src and dst must have the same type") @@ -26,8 +239,7 @@ func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { linkInstr(cur, prevNext) } -// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { if !v.IsRealReg() { panic("BUG: VReg must be backed by real reg to be stored") } @@ -61,8 +273,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft return linkInstr(cur, prevNext) } -// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { if !v.IsRealReg() { panic("BUG: VReg must be backed by real reg to be stored") } @@ -98,13 +309,7 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af return linkInstr(cur, prevNext) } -// ClobberedRegisters implements backend.RegAllocFunctionMachine. -func (m *machine) ClobberedRegisters(regs []regalloc.VReg) { - m.clobberedRegs = append(m.clobberedRegs[:0], regs...) -} - -// Swap implements backend.RegAllocFunctionMachine. -func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) { +func (m *machine) swap(cur *instruction, x1, x2, tmp regalloc.VReg) { if x1.RegType() == regalloc.RegTypeInt { prevNext := cur.next xc := m.allocateInstr().asXCHG(x1, newOperandReg(x2), 8) @@ -113,25 +318,24 @@ func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) { } else { if tmp.Valid() { prevNext := cur.next - m.InsertMoveBefore(tmp, x1, prevNext) - m.InsertMoveBefore(x1, x2, prevNext) - m.InsertMoveBefore(x2, tmp, prevNext) + m.insertMoveBefore(tmp, x1, prevNext) + m.insertMoveBefore(x1, x2, prevNext) + m.insertMoveBefore(x2, tmp, prevNext) } else { prevNext := cur.next r2 := x2.RealReg() // Temporarily spill x1 to stack. - cur = m.InsertStoreRegisterAt(x1, cur, true).prev + cur = m.insertStoreRegisterAt(x1, cur, true).prev // Then move x2 to x1. cur = linkInstr(cur, m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqa, newOperandReg(x2), x1)) linkInstr(cur, prevNext) // Then reload the original value on x1 from stack to r2. - m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true) + m.insertReloadRegisterAt(x1.SetRealReg(r2), cur, true) } } } -// LastInstrForInsertion implements backend.RegAllocFunctionMachine. -func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction { +func lastInstrForInsertion(begin, end *instruction) *instruction { cur := end for cur.kind == nop0 { cur = cur.prev @@ -146,8 +350,3 @@ func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction { return end } } - -// SSABlockLabel implements backend.RegAllocFunctionMachine. -func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label { - return m.ectx.SsaBlockIDToLabels[id] -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go index 539a8b754..8d514d857 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go @@ -127,7 +127,7 @@ func (m *machine) lowerSqmulRoundSat(x, y, ret ssa.Value) { tmpX := m.copyToTmp(xx.reg()) m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmulhrsw, yy, tmpX)) - m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmpX), tmp)) + m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqw, newOperandReg(tmpX), tmp)) m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmpX)) m.copyTo(tmpX, m.c.VRegOf(ret)) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go index c6fcb8673..787975683 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go @@ -59,7 +59,7 @@ func (o *operand) format(_64 bool) string { case operandKindImm32: return fmt.Sprintf("$%d", int32(o.imm32())) case operandKindLabel: - return backend.Label(o.imm32()).String() + return label(o.imm32()).String() default: panic(fmt.Sprintf("BUG: invalid operand: %s", o.kind)) } @@ -85,22 +85,22 @@ func (o *operand) imm32() uint32 { return uint32(o.data) } -func (o *operand) label() backend.Label { +func (o *operand) label() label { switch o.kind { case operandKindLabel: - return backend.Label(o.data) + return label(o.data) case operandKindMem: mem := o.addressMode() if mem.kind() != amodeRipRel { panic("BUG: invalid label") } - return backend.Label(mem.imm32) + return label(mem.imm32) default: panic("BUG: invalid operand kind") } } -func newOperandLabel(label backend.Label) operand { +func newOperandLabel(label label) operand { return operand{kind: operandKindLabel, data: uint64(label)} } @@ -221,7 +221,7 @@ func (m *machine) newAmodeRegRegShift(imm32 uint32, base, index regalloc.VReg, s return ret } -func (m *machine) newAmodeRipRel(label backend.Label) *amode { +func (m *machine) newAmodeRipRel(label label) *amode { ret := m.amodePool.Allocate() *ret = amode{kindWithShift: uint32(amodeRipRel), imm32: uint32(label)} return ret @@ -246,18 +246,18 @@ func (a *amode) String() string { "%d(%s,%s,%d)", int32(a.imm32), formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift) case amodeRipRel: - return fmt.Sprintf("%s(%%rip)", backend.Label(a.imm32)) + return fmt.Sprintf("%s(%%rip)", label(a.imm32)) default: panic("BUG: invalid amode kind") } } -func (m *machine) getOperand_Mem_Reg(def *backend.SSAValueDefinition) (op operand) { - if def.IsFromBlockParam() { - return newOperandReg(def.BlkParamVReg) +func (m *machine) getOperand_Mem_Reg(def backend.SSAValueDefinition) (op operand) { + if !def.IsFromInstr() { + return newOperandReg(m.c.VRegOf(def.V)) } - if def.SSAValue().Type() == ssa.TypeV128 { + if def.V.Type() == ssa.TypeV128 { // SIMD instructions require strict memory alignment, so we don't support the memory operand for V128 at the moment. return m.getOperand_Reg(def) } @@ -272,9 +272,9 @@ func (m *machine) getOperand_Mem_Reg(def *backend.SSAValueDefinition) (op operan return m.getOperand_Reg(def) } -func (m *machine) getOperand_Mem_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) { - if def.IsFromBlockParam() { - return newOperandReg(def.BlkParamVReg) +func (m *machine) getOperand_Mem_Imm32_Reg(def backend.SSAValueDefinition) (op operand) { + if !def.IsFromInstr() { + return newOperandReg(m.c.VRegOf(def.V)) } if m.c.MatchInstr(def, ssa.OpcodeLoad) { @@ -287,9 +287,9 @@ func (m *machine) getOperand_Mem_Imm32_Reg(def *backend.SSAValueDefinition) (op return m.getOperand_Imm32_Reg(def) } -func (m *machine) getOperand_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) { - if def.IsFromBlockParam() { - return newOperandReg(def.BlkParamVReg) +func (m *machine) getOperand_Imm32_Reg(def backend.SSAValueDefinition) (op operand) { + if !def.IsFromInstr() { + return newOperandReg(m.c.VRegOf(def.V)) } instr := def.Instr @@ -323,24 +323,14 @@ func asImm32(val uint64, allowSignExt bool) (uint32, bool) { return u32val, true } -func (m *machine) getOperand_Reg(def *backend.SSAValueDefinition) (op operand) { +func (m *machine) getOperand_Reg(def backend.SSAValueDefinition) (op operand) { var v regalloc.VReg - if def.IsFromBlockParam() { - v = def.BlkParamVReg + if instr := def.Instr; instr != nil && instr.Constant() { + // We inline all the constant instructions so that we could reduce the register usage. + v = m.lowerConstant(instr) + instr.MarkLowered() } else { - instr := def.Instr - if instr.Constant() { - // We inline all the constant instructions so that we could reduce the register usage. - v = m.lowerConstant(instr) - instr.MarkLowered() - } else { - if n := def.N; n == 0 { - v = m.c.VRegOf(instr.Return()) - } else { - _, rs := instr.Returns() - v = m.c.VRegOf(rs[n-1]) - } - } + v = m.c.VRegOf(def.V) } return newOperandReg(v) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go deleted file mode 100644 index 5219837e3..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build !tinygo - -package amd64 - -import "reflect" - -// setSliceLimits sets both Cap and Len for the given reflected slice. -func setSliceLimits(s *reflect.SliceHeader, limit uintptr) { - s.Len = int(limit) - s.Cap = int(limit) -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go deleted file mode 100644 index df4cf46ec..000000000 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build tinygo - -package amd64 - -import "reflect" - -// setSliceLimits sets both Cap and Len for the given reflected slice. -func setSliceLimits(s *reflect.SliceHeader, limit uintptr) { - s.Len = limit - s.Len = limit -} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go index 05ba5f027..ef823bdbd 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go @@ -9,12 +9,14 @@ import ( ) func stackView(rbp, top uintptr) []byte { + l := int(top - rbp) var stackBuf []byte { - // TODO: use unsafe.Slice after floor version is set to Go 1.20. + //nolint:staticcheck hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf)) hdr.Data = rbp - setSliceLimits(hdr, top-rbp) + hdr.Len = l + hdr.Cap = l } return stackBuf } @@ -72,9 +74,9 @@ func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 { // | SizeInBytes | // +-----------------+ <---- stackPointerBeforeGoCall // (low address) - data := unsafe.Pointer(uintptr(unsafe.Pointer(stackPointerBeforeGoCall)) + 8) + data := unsafe.Add(unsafe.Pointer(stackPointerBeforeGoCall), 8) size := *stackPointerBeforeGoCall / 8 - return unsafe.Slice((*uint64)(data), int(size)) + return unsafe.Slice((*uint64)(data), size) } func AdjustClonedStack(oldRsp, oldTop, rsp, rbp, top uintptr) { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go index 4eaa13ce1..d1eaa7cd4 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go @@ -182,9 +182,9 @@ func (m *machine) LowerReturns(rets []ssa.Value) { // callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the // caller side of the function call. -func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, slotBegin int64) { +func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def backend.SSAValueDefinition, slotBegin int64) { arg := &a.Args[argIndex] - if def != nil && def.IsFromInstr() { + if def.IsFromInstr() { // Constant instructions are inlined. if inst := def.Instr; inst.Constant() { val := inst.Return() @@ -228,10 +228,9 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i } func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, *addressMode) { - exct := m.executableContext - exct.PendingInstructions = exct.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse) - for _, instr := range exct.PendingInstructions { + for _, instr := range m.pendingInstructions { cur = linkInstr(cur, instr) } return cur, mode diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go index 99e6bb482..06f8a4a05 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go @@ -14,7 +14,6 @@ var calleeSavedRegistersSorted = []regalloc.VReg{ // CompileGoFunctionTrampoline implements backend.Machine. func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte { - exct := m.executableContext argBegin := 1 // Skips exec context by default. if needModuleContextPtr { argBegin++ @@ -26,7 +25,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * cur := m.allocateInstr() cur.asNop0() - exct.RootInstr = cur + m.rootInstr = cur // Execution context is always the first argument. execCtrPtr := x0VReg @@ -244,7 +243,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * ret.asRet() linkInstr(cur, ret) - m.encode(m.executableContext.RootInstr) + m.encode(m.rootInstr) return m.compiler.Buf() } @@ -302,20 +301,18 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re } func (m *machine) lowerConstantI64AndInsert(cur *instruction, dst regalloc.VReg, v int64) *instruction { - exct := m.executableContext - exct.PendingInstructions = exct.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] m.lowerConstantI64(dst, v) - for _, instr := range exct.PendingInstructions { + for _, instr := range m.pendingInstructions { cur = linkInstr(cur, instr) } return cur } func (m *machine) lowerConstantI32AndInsert(cur *instruction, dst regalloc.VReg, v int32) *instruction { - exct := m.executableContext - exct.PendingInstructions = exct.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] m.lowerConstantI32(dst, v) - for _, instr := range exct.PendingInstructions { + for _, instr := range m.pendingInstructions { cur = linkInstr(cur, instr) } return cur diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go index 7121cb538..1f563428a 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -36,18 +36,6 @@ type ( instructionKind byte ) -func asNop0(i *instruction) { - i.kind = nop0 -} - -func setNext(i, next *instruction) { - i.next = next -} - -func setPrev(i, prev *instruction) { - i.prev = prev -} - // IsCall implements regalloc.Instr IsCall. func (i *instruction) IsCall() bool { return i.kind == call @@ -63,21 +51,6 @@ func (i *instruction) IsReturn() bool { return i.kind == ret } -// Next implements regalloc.Instr Next. -func (i *instruction) Next() regalloc.Instr { - return i.next -} - -// Prev implements regalloc.Instr Prev. -func (i *instruction) Prev() regalloc.Instr { - return i.prev -} - -// AddedBeforeRegAlloc implements regalloc.Instr AddedBeforeRegAlloc. -func (i *instruction) AddedBeforeRegAlloc() bool { - return i.addedBeforeRegAlloc -} - type defKind byte const ( diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go index f0ede2d6a..21be9b71e 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go @@ -12,7 +12,7 @@ import ( // Encode implements backend.Machine Encode. func (m *machine) Encode(ctx context.Context) error { m.resolveRelativeAddresses(ctx) - m.encode(m.executableContext.RootInstr) + m.encode(m.rootInstr) if l := len(m.compiler.Buf()); l > maxFunctionExecutableSize { return fmt.Errorf("function size exceeds the limit: %d > %d", l, maxFunctionExecutableSize) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 048bf3204..f9df356c0 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -17,19 +17,18 @@ import ( // LowerSingleBranch implements backend.Machine. func (m *machine) LowerSingleBranch(br *ssa.Instruction) { - ectx := m.executableContext switch br.Opcode() { case ssa.OpcodeJump: - _, _, targetBlk := br.BranchData() + _, _, targetBlkID := br.BranchData() if br.IsFallthroughJump() { return } b := m.allocateInstr() - target := ectx.GetOrAllocateSSABlockLabel(targetBlk) - if target == labelReturn { + targetBlk := m.compiler.SSABuilder().BasicBlock(targetBlkID) + if targetBlk.ReturnBlock() { b.asRet() } else { - b.asBr(target) + b.asBr(ssaBlockLabel(targetBlk)) } m.insert(b) case ssa.OpcodeBrTable: @@ -40,7 +39,8 @@ func (m *machine) LowerSingleBranch(br *ssa.Instruction) { } func (m *machine) lowerBrTable(i *ssa.Instruction) { - index, targets := i.BrTableData() + index, targetBlockIDs := i.BrTableData() + targetBlockCount := len(targetBlockIDs.View()) indexOperand := m.getOperand_NR(m.compiler.ValueDefinition(index), extModeNone) // Firstly, we have to do the bounds check of the index, and @@ -50,7 +50,7 @@ func (m *machine) lowerBrTable(i *ssa.Instruction) { // subs wzr, index, maxIndexReg // csel adjustedIndex, maxIndexReg, index, hs ;; if index is higher or equal than maxIndexReg. maxIndexReg := m.compiler.AllocateVReg(ssa.TypeI32) - m.lowerConstantI32(maxIndexReg, int32(len(targets)-1)) + m.lowerConstantI32(maxIndexReg, int32(targetBlockCount-1)) subs := m.allocateInstr() subs.asALU(aluOpSubS, xzrVReg, indexOperand, operandNR(maxIndexReg), false) m.insert(subs) @@ -61,24 +61,24 @@ func (m *machine) lowerBrTable(i *ssa.Instruction) { brSequence := m.allocateInstr() - tableIndex := m.addJmpTableTarget(targets) - brSequence.asBrTableSequence(adjustedIndex, tableIndex, len(targets)) + tableIndex := m.addJmpTableTarget(targetBlockIDs) + brSequence.asBrTableSequence(adjustedIndex, tableIndex, targetBlockCount) m.insert(brSequence) } // LowerConditionalBranch implements backend.Machine. func (m *machine) LowerConditionalBranch(b *ssa.Instruction) { - exctx := m.executableContext - cval, args, targetBlk := b.BranchData() + cval, args, targetBlkID := b.BranchData() if len(args) > 0 { panic(fmt.Sprintf( "conditional branch shouldn't have args; likely a bug in critical edge splitting: from %s to %s", - exctx.CurrentSSABlk, - targetBlk, + m.currentLabelPos.sb, + targetBlkID, )) } - target := exctx.GetOrAllocateSSABlockLabel(targetBlk) + targetBlk := m.compiler.SSABuilder().BasicBlock(targetBlkID) + target := ssaBlockLabel(targetBlk) cvalDef := m.compiler.ValueDefinition(cval) switch { @@ -791,7 +791,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { default: panic("TODO: lowering " + op.String()) } - m.executableContext.FlushPendingInstructions() + m.FlushPendingInstructions() } func (m *machine) lowerShuffle(rd regalloc.VReg, rn, rm operand, lane1, lane2 uint64) { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go index d9fbf1789..7a398c3d0 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go @@ -162,9 +162,9 @@ func (o operand) assignReg(v regalloc.VReg) operand { // // `mode` is used to extend the operand if the bit length is smaller than mode.bits(). // If the operand can be expressed as operandKindImm12, `mode` is ignored. -func (m *machine) getOperand_Imm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { - if def.IsFromBlockParam() { - return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_Imm12_ER_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { + if !def.IsFromInstr() { + return operandNR(m.compiler.VRegOf(def.V)) } instr := def.Instr @@ -179,9 +179,9 @@ func (m *machine) getOperand_Imm12_ER_SR_NR(def *backend.SSAValueDefinition, mod // getOperand_MaybeNegatedImm12_ER_SR_NR is almost the same as getOperand_Imm12_ER_SR_NR, but this might negate the immediate value. // If the immediate value is negated, the second return value is true, otherwise always false. -func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand, negatedImm12 bool) { - if def.IsFromBlockParam() { - return operandNR(def.BlkParamVReg), false +func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand, negatedImm12 bool) { + if !def.IsFromInstr() { + return operandNR(m.compiler.VRegOf(def.V)), false } instr := def.Instr @@ -193,7 +193,7 @@ func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDef } signExtended := int64(c) - if def.SSAValue().Type().Bits() == 32 { + if def.V.Type().Bits() == 32 { signExtended = (signExtended << 32) >> 32 } negatedWithoutSign := -signExtended @@ -208,9 +208,9 @@ func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDef // ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def). // // `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -func (m *machine) getOperand_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { - if def.IsFromBlockParam() { - return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_ER_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { + if !def.IsFromInstr() { + return operandNR(m.compiler.VRegOf(def.V)) } if m.compiler.MatchInstr(def, ssa.OpcodeSExtend) || m.compiler.MatchInstr(def, ssa.OpcodeUExtend) { @@ -251,9 +251,9 @@ func (m *machine) getOperand_ER_SR_NR(def *backend.SSAValueDefinition, mode extM // ensureValueNR returns an operand of either operandKindSR or operandKindNR from the given value (defined by `def). // // `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -func (m *machine) getOperand_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { - if def.IsFromBlockParam() { - return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_SR_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { + if !def.IsFromInstr() { + return operandNR(m.compiler.VRegOf(def.V)) } if m.compiler.MatchInstr(def, ssa.OpcodeIshl) { @@ -273,9 +273,9 @@ func (m *machine) getOperand_SR_NR(def *backend.SSAValueDefinition, mode extMode } // getOperand_ShiftImm_NR returns an operand of either operandKindShiftImm or operandKindNR from the given value (defined by `def). -func (m *machine) getOperand_ShiftImm_NR(def *backend.SSAValueDefinition, mode extMode, shiftBitWidth byte) (op operand) { - if def.IsFromBlockParam() { - return operandNR(def.BlkParamVReg) +func (m *machine) getOperand_ShiftImm_NR(def backend.SSAValueDefinition, mode extMode, shiftBitWidth byte) (op operand) { + if !def.IsFromInstr() { + return operandNR(m.compiler.VRegOf(def.V)) } instr := def.Instr @@ -289,28 +289,18 @@ func (m *machine) getOperand_ShiftImm_NR(def *backend.SSAValueDefinition, mode e // ensureValueNR returns an operand of operandKindNR from the given value (defined by `def). // // `mode` is used to extend the operand if the bit length is smaller than mode.bits(). -func (m *machine) getOperand_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) { +func (m *machine) getOperand_NR(def backend.SSAValueDefinition, mode extMode) (op operand) { var v regalloc.VReg - if def.IsFromBlockParam() { - v = def.BlkParamVReg + if def.IsFromInstr() && def.Instr.Constant() { + // We inline all the constant instructions so that we could reduce the register usage. + v = m.lowerConstant(def.Instr) + def.Instr.MarkLowered() } else { - instr := def.Instr - if instr.Constant() { - // We inline all the constant instructions so that we could reduce the register usage. - v = m.lowerConstant(instr) - instr.MarkLowered() - } else { - if n := def.N; n == 0 { - v = m.compiler.VRegOf(instr.Return()) - } else { - _, rs := instr.Returns() - v = m.compiler.VRegOf(rs[n-1]) - } - } + v = m.compiler.VRegOf(def.V) } r := v - switch inBits := def.SSAValue().Type().Bits(); { + switch inBits := def.V.Type().Bits(); { case mode == extModeNone: case inBits == 32 && (mode == extModeZeroExtend32 || mode == extModeSignExtend32): case inBits == 32 && mode == extModeZeroExtend64: diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go index 5f584f928..00e6b238f 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go @@ -3,6 +3,7 @@ package arm64 import ( "context" "fmt" + "math" "strings" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" @@ -14,12 +15,33 @@ import ( type ( // machine implements backend.Machine. machine struct { - compiler backend.Compiler - executableContext *backend.ExecutableContextT[instruction] - currentABI *backend.FunctionABI - - regAlloc regalloc.Allocator - regAllocFn *backend.RegAllocFunction[*instruction, *machine] + compiler backend.Compiler + currentABI *backend.FunctionABI + instrPool wazevoapi.Pool[instruction] + // labelPositionPool is the pool of labelPosition. The id is the label where + // if the label is less than the maxSSABlockID, it's the ssa.BasicBlockID. + labelPositionPool wazevoapi.IDedPool[labelPosition] + + // nextLabel is the next label to be allocated. The first free label comes after maxSSABlockID + // so that we can have an identical label for the SSA block ID, which is useful for debugging. + nextLabel label + // rootInstr is the first instruction of the function. + rootInstr *instruction + // currentLabelPos is the currently-compiled ssa.BasicBlock's labelPosition. + currentLabelPos *labelPosition + // orderedSSABlockLabelPos is the ordered list of labelPosition in the generated code for each ssa.BasicBlock. + orderedSSABlockLabelPos []*labelPosition + // returnLabelPos is the labelPosition for the return block. + returnLabelPos labelPosition + // perBlockHead and perBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock. + perBlockHead, perBlockEnd *instruction + // pendingInstructions are the instructions which are not yet emitted into the instruction list. + pendingInstructions []*instruction + // maxSSABlockID is the maximum ssa.BasicBlockID in the current function. + maxSSABlockID label + + regAlloc regalloc.Allocator[*instruction, *labelPosition, *regAllocFn] + regAllocFn regAllocFn amodePool wazevoapi.Pool[addressMode] @@ -35,6 +57,8 @@ type ( // jmpTableTargets holds the labels of the jump table targets. jmpTableTargets [][]uint32 + // jmpTableTargetNext is the index to the jmpTableTargets slice to be used for the next jump table. + jmpTableTargetsNext int // spillSlotSize is the size of the stack slot in bytes used for spilling registers. // During the execution of the function, the stack looks like: @@ -91,45 +115,132 @@ type ( nextLabel label offset int64 } +) - labelPosition = backend.LabelPosition[instruction] - label = backend.Label +type ( + // label represents a position in the generated code which is either + // a real instruction or the constant InstructionPool (e.g. jump tables). + // + // This is exactly the same as the traditional "label" in assembly code. + label uint32 + + // labelPosition represents the regions of the generated code which the label represents. + // This implements regalloc.Block. + labelPosition struct { + // sb is not nil if this corresponds to a ssa.BasicBlock. + sb ssa.BasicBlock + // cur is used to walk through the instructions in the block during the register allocation. + cur, + // begin and end are the first and last instructions of the block. + begin, end *instruction + // binaryOffset is the offset in the binary where the label is located. + binaryOffset int64 + } ) const ( - labelReturn = backend.LabelReturn - labelInvalid = backend.LabelInvalid + labelReturn label = math.MaxUint32 + labelInvalid = labelReturn - 1 ) +// String implements backend.Machine. +func (l label) String() string { + return fmt.Sprintf("L%d", l) +} + +func resetLabelPosition(l *labelPosition) { + *l = labelPosition{} +} + // NewBackend returns a new backend for arm64. func NewBackend() backend.Machine { m := &machine{ spillSlots: make(map[regalloc.VRegID]int64), - executableContext: newExecutableContext(), - regAlloc: regalloc.NewAllocator(regInfo), + regAlloc: regalloc.NewAllocator[*instruction, *labelPosition, *regAllocFn](regInfo), amodePool: wazevoapi.NewPool[addressMode](resetAddressMode), + instrPool: wazevoapi.NewPool[instruction](resetInstruction), + labelPositionPool: wazevoapi.NewIDedPool[labelPosition](resetLabelPosition), } + m.regAllocFn.m = m return m } -func newExecutableContext() *backend.ExecutableContextT[instruction] { - return backend.NewExecutableContextT[instruction](resetInstruction, setNext, setPrev, asNop0) +func ssaBlockLabel(sb ssa.BasicBlock) label { + if sb.ReturnBlock() { + return labelReturn + } + return label(sb.ID()) } -// ExecutableContext implements backend.Machine. -func (m *machine) ExecutableContext() backend.ExecutableContext { - return m.executableContext +// getOrAllocateSSABlockLabelPosition returns the labelPosition for the given basic block. +func (m *machine) getOrAllocateSSABlockLabelPosition(sb ssa.BasicBlock) *labelPosition { + if sb.ReturnBlock() { + m.returnLabelPos.sb = sb + return &m.returnLabelPos + } + + l := ssaBlockLabel(sb) + pos := m.labelPositionPool.GetOrAllocate(int(l)) + pos.sb = sb + return pos } -// RegAlloc implements backend.Machine Function. -func (m *machine) RegAlloc() { - rf := m.regAllocFn - for _, pos := range m.executableContext.OrderedBlockLabels { - rf.AddBlock(pos.SB, pos.L, pos.Begin, pos.End) +// LinkAdjacentBlocks implements backend.Machine. +func (m *machine) LinkAdjacentBlocks(prev, next ssa.BasicBlock) { + prevPos, nextPos := m.getOrAllocateSSABlockLabelPosition(prev), m.getOrAllocateSSABlockLabelPosition(next) + prevPos.end.next = nextPos.begin +} + +// StartBlock implements backend.Machine. +func (m *machine) StartBlock(blk ssa.BasicBlock) { + m.currentLabelPos = m.getOrAllocateSSABlockLabelPosition(blk) + labelPos := m.currentLabelPos + end := m.allocateNop() + m.perBlockHead, m.perBlockEnd = end, end + labelPos.begin, labelPos.end = end, end + m.orderedSSABlockLabelPos = append(m.orderedSSABlockLabelPos, labelPos) +} + +// EndBlock implements ExecutableContext. +func (m *machine) EndBlock() { + // Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions. + m.insertAtPerBlockHead(m.allocateNop()) + + m.currentLabelPos.begin = m.perBlockHead + + if m.currentLabelPos.sb.EntryBlock() { + m.rootInstr = m.perBlockHead + } +} + +func (m *machine) insertAtPerBlockHead(i *instruction) { + if m.perBlockHead == nil { + m.perBlockHead = i + m.perBlockEnd = i + return } + i.next = m.perBlockHead + m.perBlockHead.prev = i + m.perBlockHead = i +} + +// FlushPendingInstructions implements backend.Machine. +func (m *machine) FlushPendingInstructions() { + l := len(m.pendingInstructions) + if l == 0 { + return + } + for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order. + m.insertAtPerBlockHead(m.pendingInstructions[i]) + } + m.pendingInstructions = m.pendingInstructions[:0] +} + +// RegAlloc implements backend.Machine Function. +func (m *machine) RegAlloc() { m.regAllocStarted = true - m.regAlloc.DoAllocation(rf) + m.regAlloc.DoAllocation(&m.regAllocFn) // Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes. m.spillSlotSize = (m.spillSlotSize + 15) &^ 15 } @@ -146,13 +257,22 @@ func (m *machine) Reset() { m.clobberedRegs = m.clobberedRegs[:0] m.regAllocStarted = false m.regAlloc.Reset() - m.regAllocFn.Reset() m.spillSlotSize = 0 m.unresolvedAddressModes = m.unresolvedAddressModes[:0] m.maxRequiredStackSizeForCalls = 0 - m.executableContext.Reset() - m.jmpTableTargets = m.jmpTableTargets[:0] + m.jmpTableTargetsNext = 0 m.amodePool.Reset() + m.instrPool.Reset() + m.labelPositionPool.Reset() + m.pendingInstructions = m.pendingInstructions[:0] + m.perBlockHead, m.perBlockEnd, m.rootInstr = nil, nil, nil + m.orderedSSABlockLabelPos = m.orderedSSABlockLabelPos[:0] +} + +// StartLoweringFunction implements backend.Machine StartLoweringFunction. +func (m *machine) StartLoweringFunction(maxBlockID ssa.BasicBlockID) { + m.maxSSABlockID = label(maxBlockID) + m.nextLabel = label(maxBlockID) + 1 } // SetCurrentABI implements backend.Machine SetCurrentABI. @@ -168,12 +288,11 @@ func (m *machine) DisableStackCheck() { // SetCompiler implements backend.Machine. func (m *machine) SetCompiler(ctx backend.Compiler) { m.compiler = ctx - m.regAllocFn = backend.NewRegAllocFunction[*instruction, *machine](m, ctx.SSABuilder(), ctx) + m.regAllocFn.ssaB = ctx.SSABuilder() } func (m *machine) insert(i *instruction) { - ectx := m.executableContext - ectx.PendingInstructions = append(ectx.PendingInstructions, i) + m.pendingInstructions = append(m.pendingInstructions, i) } func (m *machine) insertBrTargetLabel() label { @@ -183,18 +302,18 @@ func (m *machine) insertBrTargetLabel() label { } func (m *machine) allocateBrTarget() (nop *instruction, l label) { - ectx := m.executableContext - l = ectx.AllocateLabel() + l = m.nextLabel + m.nextLabel++ nop = m.allocateInstr() nop.asNop0WithLabel(l) - pos := ectx.GetOrAllocateLabelPosition(l) - pos.Begin, pos.End = nop, nop + pos := m.labelPositionPool.GetOrAllocate(int(l)) + pos.begin, pos.end = nop, nop return } // allocateInstr allocates an instruction. func (m *machine) allocateInstr() *instruction { - instr := m.executableContext.InstructionPool.Allocate() + instr := m.instrPool.Allocate() if !m.regAllocStarted { instr.addedBeforeRegAlloc = true } @@ -251,7 +370,6 @@ func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruc // resolveRelativeAddresses resolves the relative addresses before encoding. func (m *machine) resolveRelativeAddresses(ctx context.Context) { - ectx := m.executableContext for { if len(m.unresolvedAddressModes) > 0 { arg0offset, ret0offset := m.arg0OffsetFromSP(), m.ret0OffsetFromSP() @@ -265,35 +383,36 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) { var fn string var fnIndex int - var labelToSSABlockID map[label]ssa.BasicBlockID + var labelPosToLabel map[*labelPosition]label if wazevoapi.PerfMapEnabled { - fn = wazevoapi.GetCurrentFunctionName(ctx) - labelToSSABlockID = make(map[label]ssa.BasicBlockID) - for i, l := range ectx.SsaBlockIDToLabels { - labelToSSABlockID[l] = ssa.BasicBlockID(i) + labelPosToLabel = make(map[*labelPosition]label) + for i := 0; i <= m.labelPositionPool.MaxIDEncountered(); i++ { + labelPosToLabel[m.labelPositionPool.Get(i)] = label(i) } + + fn = wazevoapi.GetCurrentFunctionName(ctx) fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx) } // Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label. var offset int64 - for i, pos := range ectx.OrderedBlockLabels { - pos.BinaryOffset = offset + for i, pos := range m.orderedSSABlockLabelPos { + pos.binaryOffset = offset var size int64 - for cur := pos.Begin; ; cur = cur.next { + for cur := pos.begin; ; cur = cur.next { switch cur.kind { case nop0: l := cur.nop0Label() - if pos := ectx.LabelPositions[l]; pos != nil { - pos.BinaryOffset = offset + size + if pos := m.labelPositionPool.Get(int(l)); pos != nil { + pos.binaryOffset = offset + size } case condBr: if !cur.condBrOffsetResolved() { var nextLabel label - if i < len(ectx.OrderedBlockLabels)-1 { + if i < len(m.orderedSSABlockLabelPos)-1 { // Note: this is only used when the block ends with fallthrough, // therefore can be safely assumed that the next block exists when it's needed. - nextLabel = ectx.OrderedBlockLabels[i+1].L + nextLabel = ssaBlockLabel(m.orderedSSABlockLabelPos[i+1].sb) } m.condBrRelocs = append(m.condBrRelocs, condBrReloc{ cbr: cur, currentLabelPos: pos, offset: offset + size, @@ -302,21 +421,14 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) { } } size += cur.size() - if cur == pos.End { + if cur == pos.end { break } } if wazevoapi.PerfMapEnabled { if size > 0 { - l := pos.L - var labelStr string - if blkID, ok := labelToSSABlockID[l]; ok { - labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID) - } else { - labelStr = l.String() - } - wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr)) + wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelPosToLabel[pos])) } } offset += size @@ -330,7 +442,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) { offset := reloc.offset target := cbr.condBrLabel() - offsetOfTarget := ectx.LabelPositions[target].BinaryOffset + offsetOfTarget := m.labelPositionPool.Get(int(target)).binaryOffset diff := offsetOfTarget - offset if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 { // This case the conditional branch is too huge. We place the trampoline instructions at the end of the current block, @@ -351,11 +463,11 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) { } var currentOffset int64 - for cur := ectx.RootInstr; cur != nil; cur = cur.next { + for cur := m.rootInstr; cur != nil; cur = cur.next { switch cur.kind { case br: target := cur.brLabel() - offsetOfTarget := ectx.LabelPositions[target].BinaryOffset + offsetOfTarget := m.labelPositionPool.Get(int(target)).binaryOffset diff := offsetOfTarget - currentOffset divided := diff >> 2 if divided < minSignedInt26 || divided > maxSignedInt26 { @@ -366,7 +478,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) { case condBr: if !cur.condBrOffsetResolved() { target := cur.condBrLabel() - offsetOfTarget := ectx.LabelPositions[target].BinaryOffset + offsetOfTarget := m.labelPositionPool.Get(int(target)).binaryOffset diff := offsetOfTarget - currentOffset if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 { panic("BUG: branch relocation for large conditional branch larger than 19-bit range must be handled properly") @@ -378,7 +490,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) { targets := m.jmpTableTargets[tableIndex] for i := range targets { l := label(targets[i]) - offsetOfTarget := ectx.LabelPositions[l].BinaryOffset + offsetOfTarget := m.labelPositionPool.Get(int(l)).binaryOffset diff := offsetOfTarget - (currentOffset + brTableSequenceOffsetTableBegin) targets[i] = uint32(diff) } @@ -399,7 +511,7 @@ const ( ) func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *labelPosition, nextLabel label) { - cur := currentBlk.End + cur := currentBlk.end originalTarget := cbr.condBrLabel() endNext := cur.next @@ -422,32 +534,27 @@ func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk * cur = linkInstr(cur, br) // Update the end of the current block. - currentBlk.End = cur + currentBlk.end = cur linkInstr(cur, endNext) } // Format implements backend.Machine. func (m *machine) Format() string { - ectx := m.executableContext begins := map[*instruction]label{} - for _, pos := range ectx.LabelPositions { + for l := label(0); l < m.nextLabel; l++ { + pos := m.labelPositionPool.Get(int(l)) if pos != nil { - begins[pos.Begin] = pos.L + begins[pos.begin] = l } } - irBlocks := map[label]ssa.BasicBlockID{} - for i, l := range ectx.SsaBlockIDToLabels { - irBlocks[l] = ssa.BasicBlockID(i) - } - var lines []string - for cur := ectx.RootInstr; cur != nil; cur = cur.next { + for cur := m.rootInstr; cur != nil; cur = cur.next { if l, ok := begins[cur]; ok { var labelStr string - if blkID, ok := irBlocks[l]; ok { - labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID) + if l <= m.maxSSABlockID { + labelStr = fmt.Sprintf("%s (SSA Block: blk%d):", l, int(l)) } else { labelStr = fmt.Sprintf("%s:", l) } @@ -508,13 +615,17 @@ func (m *machine) frameSize() int64 { return s } -func (m *machine) addJmpTableTarget(targets []ssa.BasicBlock) (index int) { - // TODO: reuse the slice! - labels := make([]uint32, len(targets)) - for j, target := range targets { - labels[j] = uint32(m.executableContext.GetOrAllocateSSABlockLabel(target)) +func (m *machine) addJmpTableTarget(targets ssa.Values) (index int) { + if m.jmpTableTargetsNext == len(m.jmpTableTargets) { + m.jmpTableTargets = append(m.jmpTableTargets, make([]uint32, 0, len(targets.View()))) + } + + index = m.jmpTableTargetsNext + m.jmpTableTargetsNext++ + m.jmpTableTargets[index] = m.jmpTableTargets[index][:0] + for _, targetBlockID := range targets.View() { + target := m.compiler.SSABuilder().BasicBlock(ssa.BasicBlockID(targetBlockID)) + m.jmpTableTargets[index] = append(m.jmpTableTargets[index], uint32(target.ID())) } - index = len(m.jmpTableTargets) - m.jmpTableTargets = append(m.jmpTableTargets, labels) return } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go index d9032f921..c646a8fab 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go @@ -15,9 +15,7 @@ func (m *machine) PostRegAlloc() { // setupPrologue initializes the prologue of the function. func (m *machine) setupPrologue() { - ectx := m.executableContext - - cur := ectx.RootInstr + cur := m.rootInstr prevInitInst := cur.next // @@ -196,21 +194,20 @@ func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction { // 1. Removes the redundant copy instruction. // 2. Inserts the epilogue. func (m *machine) postRegAlloc() { - ectx := m.executableContext - for cur := ectx.RootInstr; cur != nil; cur = cur.next { + for cur := m.rootInstr; cur != nil; cur = cur.next { switch cur.kind { case ret: m.setupEpilogueAfter(cur.prev) case loadConstBlockArg: lc := cur next := lc.next - m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] m.lowerLoadConstantBlockArgAfterRegAlloc(lc) - for _, instr := range m.executableContext.PendingInstructions { + for _, instr := range m.pendingInstructions { cur = linkInstr(cur, instr) } linkInstr(cur, next) - m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] default: // Removes the redundant copy instruction. if cur.IsCopy() && cur.rn.realReg() == cur.rd.RealReg() { @@ -432,11 +429,9 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi // CompileStackGrowCallSequence implements backend.Machine. func (m *machine) CompileStackGrowCallSequence() []byte { - ectx := m.executableContext - cur := m.allocateInstr() cur.asNop0() - ectx.RootInstr = cur + m.rootInstr = cur // Save the callee saved and argument registers. cur = m.saveRegistersInExecutionContext(cur, saveRequiredRegs) @@ -458,16 +453,14 @@ func (m *machine) CompileStackGrowCallSequence() []byte { ret.asRet() linkInstr(cur, ret) - m.encode(ectx.RootInstr) + m.encode(m.rootInstr) return m.compiler.Buf() } func (m *machine) addsAddOrSubStackPointer(cur *instruction, rd regalloc.VReg, diff int64, add bool) *instruction { - ectx := m.executableContext - - ectx.PendingInstructions = ectx.PendingInstructions[:0] + m.pendingInstructions = m.pendingInstructions[:0] m.insertAddOrSubStackPointer(rd, diff, add) - for _, inserted := range ectx.PendingInstructions { + for _, inserted := range m.pendingInstructions { cur = linkInstr(cur, inserted) } return cur diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go index c7eb92cc2..f2ed53ae5 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go @@ -3,18 +3,226 @@ package arm64 // This file implements the interfaces required for register allocations. See backend.RegAllocFunctionMachine. import ( - "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" ) -// ClobberedRegisters implements backend.RegAllocFunctionMachine. -func (m *machine) ClobberedRegisters(regs []regalloc.VReg) { - m.clobberedRegs = append(m.clobberedRegs[:0], regs...) +// regAllocFn implements regalloc.Function. +type regAllocFn struct { + ssaB ssa.Builder + m *machine + loopNestingForestRoots []ssa.BasicBlock + blockIter int } -// Swap implements backend.RegAllocFunctionMachine. -func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) { +// PostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorBegin() *labelPosition { + f.blockIter = len(f.m.orderedSSABlockLabelPos) - 1 + return f.PostOrderBlockIteratorNext() +} + +// PostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) PostOrderBlockIteratorNext() *labelPosition { + if f.blockIter < 0 { + return nil + } + b := f.m.orderedSSABlockLabelPos[f.blockIter] + f.blockIter-- + return b +} + +// ReversePostOrderBlockIteratorBegin implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorBegin() *labelPosition { + f.blockIter = 0 + return f.ReversePostOrderBlockIteratorNext() +} + +// ReversePostOrderBlockIteratorNext implements regalloc.Function. +func (f *regAllocFn) ReversePostOrderBlockIteratorNext() *labelPosition { + if f.blockIter >= len(f.m.orderedSSABlockLabelPos) { + return nil + } + b := f.m.orderedSSABlockLabelPos[f.blockIter] + f.blockIter++ + return b +} + +// ClobberedRegisters implements regalloc.Function. +func (f *regAllocFn) ClobberedRegisters(regs []regalloc.VReg) { + f.m.clobberedRegs = append(f.m.clobberedRegs[:0], regs...) +} + +// LoopNestingForestRoots implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoots() int { + f.loopNestingForestRoots = f.ssaB.LoopNestingForestRoots() + return len(f.loopNestingForestRoots) +} + +// LoopNestingForestRoot implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestRoot(i int) *labelPosition { + root := f.loopNestingForestRoots[i] + pos := f.m.getOrAllocateSSABlockLabelPosition(root) + return pos +} + +// LowestCommonAncestor implements regalloc.Function. +func (f *regAllocFn) LowestCommonAncestor(blk1, blk2 *labelPosition) *labelPosition { + sb := f.ssaB.LowestCommonAncestor(blk1.sb, blk2.sb) + pos := f.m.getOrAllocateSSABlockLabelPosition(sb) + return pos +} + +// Idom implements regalloc.Function. +func (f *regAllocFn) Idom(blk *labelPosition) *labelPosition { + sb := f.ssaB.Idom(blk.sb) + pos := f.m.getOrAllocateSSABlockLabelPosition(sb) + return pos +} + +// SwapBefore implements regalloc.Function. +func (f *regAllocFn) SwapBefore(x1, x2, tmp regalloc.VReg, instr *instruction) { + f.m.swap(instr.prev, x1, x2, tmp) +} + +// StoreRegisterBefore implements regalloc.Function. +func (f *regAllocFn) StoreRegisterBefore(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertStoreRegisterAt(v, instr, false) +} + +// StoreRegisterAfter implements regalloc.Function. +func (f *regAllocFn) StoreRegisterAfter(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertStoreRegisterAt(v, instr, true) +} + +// ReloadRegisterBefore implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterBefore(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertReloadRegisterAt(v, instr, false) +} + +// ReloadRegisterAfter implements regalloc.Function. +func (f *regAllocFn) ReloadRegisterAfter(v regalloc.VReg, instr *instruction) { + m := f.m + m.insertReloadRegisterAt(v, instr, true) +} + +// InsertMoveBefore implements regalloc.Function. +func (f *regAllocFn) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { + f.m.insertMoveBefore(dst, src, instr) +} + +// LoopNestingForestChild implements regalloc.Function. +func (f *regAllocFn) LoopNestingForestChild(pos *labelPosition, i int) *labelPosition { + childSB := pos.sb.LoopNestingForestChildren()[i] + return f.m.getOrAllocateSSABlockLabelPosition(childSB) +} + +// Succ implements regalloc.Block. +func (f *regAllocFn) Succ(pos *labelPosition, i int) *labelPosition { + succSB := pos.sb.Succ(i) + if succSB.ReturnBlock() { + return nil + } + return f.m.getOrAllocateSSABlockLabelPosition(succSB) +} + +// Pred implements regalloc.Block. +func (f *regAllocFn) Pred(pos *labelPosition, i int) *labelPosition { + predSB := pos.sb.Pred(i) + return f.m.getOrAllocateSSABlockLabelPosition(predSB) +} + +// BlockParams implements regalloc.Function. +func (f *regAllocFn) BlockParams(pos *labelPosition, regs *[]regalloc.VReg) []regalloc.VReg { + c := f.m.compiler + *regs = (*regs)[:0] + for i := 0; i < pos.sb.Params(); i++ { + v := c.VRegOf(pos.sb.Param(i)) + *regs = append(*regs, v) + } + return *regs +} + +// ID implements regalloc.Block. +func (pos *labelPosition) ID() int32 { + return int32(pos.sb.ID()) +} + +// InstrIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrIteratorBegin() *instruction { + ret := pos.begin + pos.cur = ret + return ret +} + +// InstrIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrIteratorNext() *instruction { + for { + if pos.cur == pos.end { + return nil + } + instr := pos.cur.next + pos.cur = instr + if instr == nil { + return nil + } else if instr.addedBeforeRegAlloc { + // Only concerned about the instruction added before regalloc. + return instr + } + } +} + +// InstrRevIteratorBegin implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorBegin() *instruction { + pos.cur = pos.end + return pos.cur +} + +// InstrRevIteratorNext implements regalloc.Block. +func (pos *labelPosition) InstrRevIteratorNext() *instruction { + for { + if pos.cur == pos.begin { + return nil + } + instr := pos.cur.prev + pos.cur = instr + if instr == nil { + return nil + } else if instr.addedBeforeRegAlloc { + // Only concerned about the instruction added before regalloc. + return instr + } + } +} + +// FirstInstr implements regalloc.Block. +func (pos *labelPosition) FirstInstr() *instruction { return pos.begin } + +// LastInstrForInsertion implements regalloc.Block. +func (pos *labelPosition) LastInstrForInsertion() *instruction { + return lastInstrForInsertion(pos.begin, pos.end) +} + +// Preds implements regalloc.Block. +func (pos *labelPosition) Preds() int { return pos.sb.Preds() } + +// Entry implements regalloc.Block. +func (pos *labelPosition) Entry() bool { return pos.sb.EntryBlock() } + +// Succs implements regalloc.Block. +func (pos *labelPosition) Succs() int { return pos.sb.Succs() } + +// LoopHeader implements regalloc.Block. +func (pos *labelPosition) LoopHeader() bool { return pos.sb.LoopHeader() } + +// LoopNestingForestChildren implements regalloc.Block. +func (pos *labelPosition) LoopNestingForestChildren() int { + return len(pos.sb.LoopNestingForestChildren()) +} + +func (m *machine) swap(cur *instruction, x1, x2, tmp regalloc.VReg) { prevNext := cur.next var mov1, mov2, mov3 *instruction if x1.RegType() == regalloc.RegTypeInt { @@ -32,12 +240,12 @@ func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) { if !tmp.Valid() { r2 := x2.RealReg() // Temporarily spill x1 to stack. - cur = m.InsertStoreRegisterAt(x1, cur, true).prev + cur = m.insertStoreRegisterAt(x1, cur, true).prev // Then move x2 to x1. cur = linkInstr(cur, m.allocateInstr().asFpuMov128(x1, x2)) linkInstr(cur, prevNext) // Then reload the original value on x1 from stack to r2. - m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true) + m.insertReloadRegisterAt(x1.SetRealReg(r2), cur, true) } else { mov1 = m.allocateInstr().asFpuMov128(tmp, x1) mov2 = m.allocateInstr().asFpuMov128(x1, x2) @@ -50,8 +258,7 @@ func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) { } } -// InsertMoveBefore implements backend.RegAllocFunctionMachine. -func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { +func (m *machine) insertMoveBefore(dst, src regalloc.VReg, instr *instruction) { typ := src.RegType() if typ != dst.RegType() { panic("BUG: src and dst must have the same type") @@ -70,13 +277,7 @@ func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) { linkInstr(cur, prevNext) } -// SSABlockLabel implements backend.RegAllocFunctionMachine. -func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label { - return m.executableContext.SsaBlockIDToLabels[id] -} - -// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { if !v.IsRealReg() { panic("BUG: VReg must be backed by real reg to be stored") } @@ -100,8 +301,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft return linkInstr(cur, prevNext) } -// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine. -func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { +func (m *machine) insertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction { if !v.IsRealReg() { panic("BUG: VReg must be backed by real reg to be stored") } @@ -134,8 +334,7 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af return linkInstr(cur, prevNext) } -// LastInstrForInsertion implements backend.RegAllocFunctionMachine. -func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction { +func lastInstrForInsertion(begin, end *instruction) *instruction { cur := end for cur.kind == nop0 { cur = cur.prev diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go index edb0e36e3..a72b86f6b 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go @@ -14,7 +14,7 @@ func UnwindStack(sp, _, top uintptr, returnAddresses []uintptr) []uintptr { var stackBuf []byte { - // TODO: use unsafe.Slice after floor version is set to Go 1.20. + //nolint:staticcheck hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf)) hdr.Data = sp hdr.Len = l @@ -78,13 +78,7 @@ func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 { // +-----------------+ <---- stackPointerBeforeGoCall // (low address) ptr := unsafe.Pointer(stackPointerBeforeGoCall) + data := (*uint64)(unsafe.Add(ptr, 16)) // skips the (frame_size, sliceSize). size := *(*uint64)(unsafe.Add(ptr, 8)) - var view []uint64 - { - sh := (*reflect.SliceHeader)(unsafe.Pointer(&view)) - sh.Data = uintptr(unsafe.Add(ptr, 16)) // skips the (frame_size, sliceSize). - sh.Len = int(size) - sh.Cap = int(size) - } - return view + return unsafe.Slice(data, size) } |