diff options
Diffstat (limited to 'vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend')
11 files changed, 315 insertions, 48 deletions
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go index 62d365015..8e3f08efc 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go @@ -88,7 +88,7 @@ type Compiler interface { MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode // AddRelocationInfo appends the relocation information for the function reference at the current buffer offset. - AddRelocationInfo(funcRef ssa.FuncRef) + AddRelocationInfo(funcRef ssa.FuncRef, isTailCall bool) // AddSourceOffsetInfo appends the source offset information for the given offset. AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset) @@ -115,6 +115,8 @@ type RelocationInfo struct { Offset int64 // Target is the target function of the call instruction. FuncRef ssa.FuncRef + // IsTailCall indicates whether the call instruction is a tail call. + IsTailCall bool } // compiler implements Compiler. @@ -352,10 +354,11 @@ func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo { } // AddRelocationInfo implements Compiler.AddRelocationInfo. -func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef) { +func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef, isTailCall bool) { c.relocations = append(c.relocations, RelocationInfo{ - Offset: int64(len(c.buf)), - FuncRef: funcRef, + Offset: int64(len(c.buf)), + FuncRef: funcRef, + IsTailCall: isTailCall, }) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go index 6a3e58f51..901c87aaf 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go @@ -21,7 +21,9 @@ type instruction struct { func (i *instruction) IsCall() bool { return i.kind == call } // IsIndirectCall implements regalloc.Instr. -func (i *instruction) IsIndirectCall() bool { return i.kind == callIndirect } +func (i *instruction) IsIndirectCall() bool { + return i.kind == callIndirect +} // IsReturn implements regalloc.Instr. func (i *instruction) IsReturn() bool { return i.kind == ret } @@ -288,6 +290,11 @@ func (i *instruction) String() string { case nopUseReg: return fmt.Sprintf("nop_use_reg %s", i.op1.format(true)) + case tailCall: + return fmt.Sprintf("tailCall %s", ssa.FuncRef(i.u1)) + case tailCallIndirect: + return fmt.Sprintf("tailCallIndirect %s", i.op1.format(true)) + default: panic(fmt.Sprintf("BUG: %d", int(i.kind))) } @@ -357,7 +364,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { default: panic(fmt.Sprintf("BUG: invalid operand: %s", i)) } - case useKindCallInd: + case useKindCallInd, useKindTailCallInd: op := i.op1 switch op.kind { case operandKindReg: @@ -428,13 +435,16 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { func (i *instruction) AssignUse(index int, v regalloc.VReg) { switch uk := useKinds[i.kind]; uk { case useKindNone: - case useKindCallInd: + case useKindCallInd, useKindTailCallInd: if index != 0 { panic("BUG") } op := &i.op1 switch op.kind { case operandKindReg: + if uk == useKindTailCallInd && v != r11VReg { + panic("BUG") + } op.setReg(v) case operandKindMem: op.addressMode().assignUses(index, v) @@ -838,6 +848,12 @@ const ( // nopUseReg is a meta instruction that uses one register and does nothing. nopUseReg + // tailCall is a meta instruction that emits a tail call. + tailCall + + // tailCallIndirect is a meta instruction that emits a tail call with an indirect call. + tailCallIndirect + instrMax ) @@ -1079,6 +1095,10 @@ func (k instructionKind) String() string { return "lockcmpxchg" case lockxadd: return "lockxadd" + case tailCall: + return "tailCall" + case tailCallIndirect: + return "tailCallIndirect" default: panic("BUG") } @@ -1173,6 +1193,27 @@ func (i *instruction) asCallIndirect(ptr operand, abi *backend.FunctionABI) *ins return i } +func (i *instruction) asTailCallReturnCall(ref ssa.FuncRef, abi *backend.FunctionABI) *instruction { + i.kind = tailCall + i.u1 = uint64(ref) + if abi != nil { + i.u2 = abi.ABIInfoAsUint64() + } + return i +} + +func (i *instruction) asTailCallReturnCallIndirect(ptr operand, abi *backend.FunctionABI) *instruction { + if ptr.kind != operandKindReg && ptr.kind != operandKindMem { + panic("BUG") + } + i.kind = tailCallIndirect + i.op1 = ptr + if abi != nil { + i.u2 = abi.ABIInfoAsUint64() + } + return i +} + func (i *instruction) asRet() *instruction { i.kind = ret return i @@ -2342,6 +2383,8 @@ var defKinds = [instrMax]defKind{ lockxadd: defKindNone, neg: defKindNone, nopUseReg: defKindNone, + tailCall: defKindCall, + tailCallIndirect: defKindCall, } // String implements fmt.Stringer. @@ -2375,6 +2418,7 @@ const ( useKindBlendvpd useKindCall useKindCallInd + useKindTailCallInd useKindFcvtToSintSequence useKindFcvtToUintSequence ) @@ -2425,6 +2469,8 @@ var useKinds = [instrMax]useKind{ lockxadd: useKindOp1RegOp2, neg: useKindOp1, nopUseReg: useKindOp1, + tailCall: useKindCall, + tailCallIndirect: useKindTailCallInd, } func (u useKind) String() string { @@ -2441,6 +2487,8 @@ func (u useKind) String() string { return "call" case useKindCallInd: return "callInd" + case useKindTailCallInd: + return "tailCallInd" default: return "invalid" } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go index 6637b428c..d1eefbdb5 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go @@ -1211,7 +1211,7 @@ func (i *instruction) encode(c backend.Compiler) (needsLabelResolution bool) { case call: c.EmitByte(0xe8) // Meaning that the call target is a function value, and requires relocation. - c.AddRelocationInfo(ssa.FuncRef(i.u1)) + c.AddRelocationInfo(ssa.FuncRef(i.u1), false) // Note that this is zero as a placeholder for the call target if it's a function value. c.Emit4Bytes(uint32(i.u2)) @@ -1244,6 +1244,37 @@ func (i *instruction) encode(c backend.Compiler) (needsLabelResolution bool) { panic("BUG: invalid operand kind") } + case tailCall: + // Encode as jmp. + c.EmitByte(0xe9) + // Meaning that the call target is a function value, and requires relocation. + c.AddRelocationInfo(ssa.FuncRef(i.u1), true) + // Note that this is zero as a placeholder for the call target if it's a function value. + c.Emit4Bytes(uint32(i.u2)) + + case tailCallIndirect: + op := i.op1 + + const opcodeNum = 1 + const opcode = 0xff + const regMemSubOpcode = 4 + rex := rexInfo(0).clearW() + switch op.kind { + // Indirect tail calls always take a register as the target. + // Note: the register should be a callee-saved register (usually r11). + case operandKindReg: + dst := regEncodings[op.reg().RealReg()] + encodeRegReg(c, + legacyPrefixesNone, + opcode, opcodeNum, + regMemSubOpcode, + dst, + rex, + ) + default: + panic("BUG: invalid operand kind") + } + case xchg: src, dst := regEncodings[i.op1.reg().RealReg()], i.op2 size := i.u1 diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go index fd0d69ca9..57d9bb731 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go @@ -17,7 +17,7 @@ import ( // NewBackend returns a new backend for arm64. func NewBackend() backend.Machine { m := &machine{ - cpuFeatures: platform.CpuFeatures, + cpuFeatures: platform.CpuFeatures(), regAlloc: regalloc.NewAllocator[*instruction, *labelPosition, *regAllocFn](regInfo), spillSlots: map[regalloc.VRegID]int64{}, amodePool: wazevoapi.NewPool[amode](nil), @@ -1109,6 +1109,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { atomicOp, size := instr.AtomicRmwData() m.lowerAtomicRmw(atomicOp, addr, val, size, instr.Return()) + case ssa.OpcodeTailCallReturnCall, ssa.OpcodeTailCallReturnCallIndirect: + m.lowerTailCall(instr) + default: panic("TODO: lowering " + op.String()) } @@ -1885,31 +1888,7 @@ func (m *machine) lowerStore(si *ssa.Instruction) { func (m *machine) lowerCall(si *ssa.Instruction) { isDirectCall := si.Opcode() == ssa.OpcodeCall - var indirectCalleePtr ssa.Value - var directCallee ssa.FuncRef - var sigID ssa.SignatureID - var args []ssa.Value - var isMemmove bool - if isDirectCall { - directCallee, sigID, args = si.CallData() - } else { - indirectCalleePtr, sigID, args, isMemmove = si.CallIndirectData() - } - calleeABI := m.c.GetFunctionABI(m.c.SSABuilder().ResolveSignature(sigID)) - - stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize()) - if m.maxRequiredStackSizeForCalls < stackSlotSize+16 { - m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // 16 == return address + RBP. - } - - // Note: See machine.SetupPrologue for the stack layout. - // The stack pointer decrease/increase will be inserted later in the compilation. - - for i, arg := range args { - reg := m.c.VRegOf(arg) - def := m.c.ValueDefinition(arg) - m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize) - } + indirectCalleePtr, directCallee, isMemmove, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall) if isMemmove { // Go's memmove *might* use all xmm0-xmm15, so we need to release them. @@ -1939,6 +1918,39 @@ func (m *machine) lowerCall(si *ssa.Instruction) { m.insert(m.allocateInstr().asNopUseReg(regInfo.RealRegToVReg[rdx])) } + m.insertReturns(si, calleeABI, stackSlotSize) +} + +func (m *machine) prepareCall(si *ssa.Instruction, isDirectCall bool) (ssa.Value, ssa.FuncRef, bool, *backend.FunctionABI, int64) { + var indirectCalleePtr ssa.Value + var directCallee ssa.FuncRef + var sigID ssa.SignatureID + var args []ssa.Value + var isMemmove bool + if isDirectCall { + directCallee, sigID, args = si.CallData() + } else { + indirectCalleePtr, sigID, args, isMemmove = si.CallIndirectData() + } + calleeABI := m.c.GetFunctionABI(m.c.SSABuilder().ResolveSignature(sigID)) + + stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize()) + if m.maxRequiredStackSizeForCalls < stackSlotSize+16 { + m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // 16 == return address + RBP. + } + + // Note: See machine.SetupPrologue for the stack layout. + // The stack pointer decrease/increase will be inserted later in the compilation. + + for i, arg := range args { + reg := m.c.VRegOf(arg) + def := m.c.ValueDefinition(arg) + m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize) + } + return indirectCalleePtr, directCallee, isMemmove, calleeABI, stackSlotSize +} + +func (m *machine) insertReturns(si *ssa.Instruction, calleeABI *backend.FunctionABI, stackSlotSize int64) { var index int r1, rs := si.Returns() if r1.Valid() { @@ -1952,6 +1964,43 @@ func (m *machine) lowerCall(si *ssa.Instruction) { } } +func (m *machine) lowerTailCall(si *ssa.Instruction) { + isDirectCall := si.Opcode() == ssa.OpcodeTailCallReturnCall + indirectCalleePtr, directCallee, isMemmove, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall) + if isMemmove { + panic("memmove not supported in tail calls") + } + + isAllRegs := stackSlotSize == 0 + + switch { + case isDirectCall && isAllRegs: + call := m.allocateInstr().asTailCallReturnCall(directCallee, calleeABI) + m.insert(call) + case !isDirectCall && isAllRegs: + // In a tail call we insert the epilogue before the jump instruction, + // so an arbitrary register might be overwritten while restoring the stack. + // So, as compared to a regular indirect call, we ensure the pointer is stored + // in a caller-saved register (r11). + // For details, see internal/engine/RATIONALE.md + ptrOp := m.getOperand_Reg(m.c.ValueDefinition(indirectCalleePtr)) + tmpJmp := r11VReg + m.InsertMove(tmpJmp, ptrOp.reg(), ssa.TypeI64) + callInd := m.allocateInstr().asTailCallReturnCallIndirect(newOperandReg(tmpJmp), calleeABI) + m.insert(callInd) + case isDirectCall && !isAllRegs: + call := m.allocateInstr().asCall(directCallee, calleeABI) + m.insert(call) + case !isDirectCall && !isAllRegs: + ptrOp := m.getOperand_Mem_Reg(m.c.ValueDefinition(indirectCalleePtr)) + callInd := m.allocateInstr().asCallIndirect(ptrOp, calleeABI) + m.insert(callInd) + } + + // If this is a proper tail call, returns will be cleared in the postRegAlloc phase. + m.insertReturns(si, calleeABI, stackSlotSize) +} + // callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the // caller side of the function call. func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def backend.SSAValueDefinition, stackSlotSize int64) { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go index e53729860..fa3ca58a6 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go @@ -188,6 +188,23 @@ func (m *machine) postRegAlloc() { linkInstr(inc, next) } continue + case tailCall, tailCallIndirect: + // At this point, reg alloc is done, therefore we can safely insert dec RPS instruction + // right before the tail call (jump) instruction. If this is done before reg alloc, the stack slot + // can point to the wrong location and therefore results in a wrong value. + tailCall := cur + _, _, _, _, size := backend.ABIInfoFromUint64(tailCall.u2) + if size > 0 { + dec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(size), rspVReg, true) + linkInstr(tailCall.prev, dec) + linkInstr(dec, tailCall) + } + // In a tail call, we insert the epilogue before the jump instruction. + m.setupEpilogueAfter(tailCall.prev) + // If this has been encoded as a proper tail call, we can remove the trailing instructions + // For details, see internal/engine/RATIONALE.md + m.removeUntilRet(cur.next) + continue } // Removes the redundant copy instruction. @@ -278,6 +295,20 @@ func (m *machine) setupEpilogueAfter(cur *instruction) { linkInstr(cur, prevNext) } +// removeUntilRet removes the instructions starting from `cur` until the first `ret` instruction. +func (m *machine) removeUntilRet(cur *instruction) { + for ; cur != nil; cur = cur.next { + prev, next := cur.prev, cur.next + prev.next = next + if next != nil { + next.prev = prev + } + if cur.kind == ret { + return + } + } +} + func (m *machine) addRSP(offset int32, cur *instruction) *instruction { if offset == 0 { return cur diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go index d1eaa7cd4..c300c3d61 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go @@ -261,6 +261,23 @@ func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn reg func (m *machine) lowerCall(si *ssa.Instruction) { isDirectCall := si.Opcode() == ssa.OpcodeCall + indirectCalleePtr, directCallee, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall) + + if isDirectCall { + call := m.allocateInstr() + call.asCall(directCallee, calleeABI) + m.insert(call) + } else { + ptr := m.compiler.VRegOf(indirectCalleePtr) + callInd := m.allocateInstr() + callInd.asCallIndirect(ptr, calleeABI) + m.insert(callInd) + } + + m.insertReturns(si, calleeABI, stackSlotSize) +} + +func (m *machine) prepareCall(si *ssa.Instruction, isDirectCall bool) (ssa.Value, ssa.FuncRef, *backend.FunctionABI, int64) { var indirectCalleePtr ssa.Value var directCallee ssa.FuncRef var sigID ssa.SignatureID @@ -282,18 +299,10 @@ func (m *machine) lowerCall(si *ssa.Instruction) { def := m.compiler.ValueDefinition(arg) m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize) } + return indirectCalleePtr, directCallee, calleeABI, stackSlotSize +} - if isDirectCall { - call := m.allocateInstr() - call.asCall(directCallee, calleeABI) - m.insert(call) - } else { - ptr := m.compiler.VRegOf(indirectCalleePtr) - callInd := m.allocateInstr() - callInd.asCallIndirect(ptr, calleeABI) - m.insert(callInd) - } - +func (m *machine) insertReturns(si *ssa.Instruction, calleeABI *backend.FunctionABI, stackSlotSize int64) { var index int r1, rs := si.Returns() if r1.Valid() { @@ -307,6 +316,40 @@ func (m *machine) lowerCall(si *ssa.Instruction) { } } +func (m *machine) lowerTailCall(si *ssa.Instruction) { + isDirectCall := si.Opcode() == ssa.OpcodeTailCallReturnCall + indirectCalleePtr, directCallee, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall) + + // We currently support tail calls only when the args are passed via registers + // otherwise we fall back to a plain call. + // For details, see internal/engine/RATIONALE.md + isAllRegs := stackSlotSize == 0 + + switch { + case isDirectCall && isAllRegs: + tailJump := m.allocateInstr() + tailJump.asTailCall(directCallee, calleeABI) + m.insert(tailJump) + case !isDirectCall && isAllRegs: + ptr := m.compiler.VRegOf(indirectCalleePtr) + callInd := m.allocateInstr() + callInd.asTailCallIndirect(ptr, calleeABI) + m.insert(callInd) + case isDirectCall && !isAllRegs: + tailJump := m.allocateInstr() + tailJump.asCall(directCallee, calleeABI) + m.insert(tailJump) + case !isDirectCall && !isAllRegs: + ptr := m.compiler.VRegOf(indirectCalleePtr) + callInd := m.allocateInstr() + callInd.asCallIndirect(ptr, calleeABI) + m.insert(callInd) + } + + // If this is a proper tail call, returns will be cleared in the postRegAlloc phase. + m.insertReturns(si, calleeABI, stackSlotSize) +} + func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) { if imm12Operand, ok := asImm12Operand(uint64(diff)); ok { alu := m.allocateInstr() diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go index 1f563428a..560044673 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -140,6 +140,8 @@ var defKinds = [numInstructionKinds]defKind{ atomicStore: defKindNone, dmb: defKindNone, loadConstBlockArg: defKindRD, + tailCall: defKindCall, + tailCallInd: defKindCall, } // Defs returns the list of regalloc.VReg that are defined by the instruction. @@ -278,6 +280,8 @@ var useKinds = [numInstructionKinds]useKind{ atomicStore: useKindRNRM, loadConstBlockArg: useKindNone, dmb: useKindNone, + tailCall: useKindCall, + tailCallInd: useKindCallInd, } // Uses returns the list of regalloc.VReg that are used by the instruction. @@ -1501,6 +1505,10 @@ func (i *instruction) String() (str string) { str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64)) case dmb: str = "dmb" + case tailCall: + str = fmt.Sprintf("b %s", ssa.FuncRef(i.u1)) + case tailCallInd: + str = fmt.Sprintf("b %s", formatVRegSized(i.rn.nr(), 64)) case udf: str = "udf" case emitSourceOffsetInfo: @@ -1550,6 +1558,22 @@ func (i *instruction) asDMB() { i.kind = dmb } +func (i *instruction) asTailCall(ref ssa.FuncRef, abi *backend.FunctionABI) { + i.kind = tailCall + i.u1 = uint64(ref) + if abi != nil { + i.u2 = abi.ABIInfoAsUint64() + } +} + +func (i *instruction) asTailCallIndirect(ptr regalloc.VReg, abi *backend.FunctionABI) { + i.kind = tailCallInd + i.rn = operandNR(ptr) + if abi != nil { + i.u2 = abi.ABIInfoAsUint64() + } +} + // TODO: delete unnecessary things. const ( // nop0 represents a no-op of zero size. @@ -1727,6 +1751,10 @@ const ( atomicStore // dmb represents the data memory barrier instruction in inner-shareable (ish) mode. dmb + // tailCall represents a tail call instruction. + tailCall + // tailCallInd represents a tail call indirect instruction. + tailCallInd // UDF is the undefined instruction. For debugging only. udf // loadConstBlockArg represents a load of a constant block argument. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go index 21be9b71e..5326a5e28 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go @@ -39,7 +39,7 @@ func (i *instruction) encode(m *machine) { c.Emit4Bytes(encodeUnconditionalBranch(false, imm)) case call: // We still don't know the exact address of the function to call, so we emit a placeholder. - c.AddRelocationInfo(i.callFuncRef()) + c.AddRelocationInfo(i.callFuncRef(), false) c.Emit4Bytes(encodeUnconditionalBranch(true, 0)) // 0 = placeholder case callInd: c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true)) @@ -417,6 +417,12 @@ func (i *instruction) encode(m *machine) { )) case dmb: c.Emit4Bytes(encodeDMB()) + case tailCall: + // We still don't know the exact address of the function to call, so we emit a placeholder. + c.AddRelocationInfo(i.callFuncRef(), true) // true = IsTailCall + c.Emit4Bytes(encodeUnconditionalBranch(false, 0)) // 0 = placeholder + case tailCallInd: + c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], false)) default: panic(i.String()) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index f9df356c0..190bc6014 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -788,6 +788,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { instr.asDMB() m.insert(instr) + case ssa.OpcodeTailCallReturnCall, ssa.OpcodeTailCallReturnCallIndirect: + m.lowerTailCall(instr) + default: panic("TODO: lowering " + op.String()) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go index c646a8fab..16d0746e5 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go @@ -198,6 +198,11 @@ func (m *machine) postRegAlloc() { switch cur.kind { case ret: m.setupEpilogueAfter(cur.prev) + case tailCall, tailCallInd: + m.setupEpilogueAfter(cur.prev) + // If this has been encoded as a proper tail call, we can remove the trailing instructions. + // For details, see internal/engine/RATIONALE.md + m.removeUntilRet(cur.next) case loadConstBlockArg: lc := cur next := lc.next @@ -325,6 +330,20 @@ func (m *machine) setupEpilogueAfter(cur *instruction) { linkInstr(cur, prevNext) } +// removeUntilRet removes the instructions starting from `cur` until the first `ret` instruction. +func (m *machine) removeUntilRet(cur *instruction) { + for ; cur != nil; cur = cur.next { + prev, next := cur.prev, cur.next + prev.next = next + if next != nil { + next.prev = prev + } + if cur.kind == ret { + return + } + } +} + // saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient // stack space left. Basically this is the combination of CalleeSavedRegisters plus argument registers execpt for x0, // which always points to the execution context whenever the native code is entered from Go. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go index 932fe842b..9bb4dee15 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go @@ -59,13 +59,19 @@ func (m *machine) ResolveRelocations( if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset { // Find the near trampoline island from callTrampolineIslandOffsets. islandOffset := searchTrampolineIsland(callTrampolineIslandOffsets, int(instrOffset)) - islandTargetOffset := islandOffset + trampolineCallSize*int(r.FuncRef) + // Imported functions don't need trampolines, so we ignore them when we compute the offset + // (see also encodeCallTrampolineIsland) + funcOffset := int(r.FuncRef) - importedFns + islandTargetOffset := islandOffset + trampolineCallSize*funcOffset diff = int64(islandTargetOffset) - (instrOffset) if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset { panic("BUG in trampoline placement") } } - binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(true, diff)) + // The unconditional branch instruction is usually encoded as a branch-and-link (BL), + // because it is a function call. However, if the instruction is a tail call, + // we encode it as a plain unconditional branch (B), so we won't overwrite the link register. + binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(!r.IsTailCall, diff)) } } |
