summaryrefslogtreecommitdiff
path: root/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend')
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go11
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go54
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go33
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go101
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go31
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go65
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go28
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go8
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go3
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go19
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go10
11 files changed, 315 insertions, 48 deletions
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go
index 62d365015..8e3f08efc 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go
@@ -88,7 +88,7 @@ type Compiler interface {
MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode
// AddRelocationInfo appends the relocation information for the function reference at the current buffer offset.
- AddRelocationInfo(funcRef ssa.FuncRef)
+ AddRelocationInfo(funcRef ssa.FuncRef, isTailCall bool)
// AddSourceOffsetInfo appends the source offset information for the given offset.
AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset)
@@ -115,6 +115,8 @@ type RelocationInfo struct {
Offset int64
// Target is the target function of the call instruction.
FuncRef ssa.FuncRef
+ // IsTailCall indicates whether the call instruction is a tail call.
+ IsTailCall bool
}
// compiler implements Compiler.
@@ -352,10 +354,11 @@ func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo {
}
// AddRelocationInfo implements Compiler.AddRelocationInfo.
-func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef) {
+func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef, isTailCall bool) {
c.relocations = append(c.relocations, RelocationInfo{
- Offset: int64(len(c.buf)),
- FuncRef: funcRef,
+ Offset: int64(len(c.buf)),
+ FuncRef: funcRef,
+ IsTailCall: isTailCall,
})
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go
index 6a3e58f51..901c87aaf 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go
@@ -21,7 +21,9 @@ type instruction struct {
func (i *instruction) IsCall() bool { return i.kind == call }
// IsIndirectCall implements regalloc.Instr.
-func (i *instruction) IsIndirectCall() bool { return i.kind == callIndirect }
+func (i *instruction) IsIndirectCall() bool {
+ return i.kind == callIndirect
+}
// IsReturn implements regalloc.Instr.
func (i *instruction) IsReturn() bool { return i.kind == ret }
@@ -288,6 +290,11 @@ func (i *instruction) String() string {
case nopUseReg:
return fmt.Sprintf("nop_use_reg %s", i.op1.format(true))
+ case tailCall:
+ return fmt.Sprintf("tailCall %s", ssa.FuncRef(i.u1))
+ case tailCallIndirect:
+ return fmt.Sprintf("tailCallIndirect %s", i.op1.format(true))
+
default:
panic(fmt.Sprintf("BUG: %d", int(i.kind)))
}
@@ -357,7 +364,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
default:
panic(fmt.Sprintf("BUG: invalid operand: %s", i))
}
- case useKindCallInd:
+ case useKindCallInd, useKindTailCallInd:
op := i.op1
switch op.kind {
case operandKindReg:
@@ -428,13 +435,16 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
func (i *instruction) AssignUse(index int, v regalloc.VReg) {
switch uk := useKinds[i.kind]; uk {
case useKindNone:
- case useKindCallInd:
+ case useKindCallInd, useKindTailCallInd:
if index != 0 {
panic("BUG")
}
op := &i.op1
switch op.kind {
case operandKindReg:
+ if uk == useKindTailCallInd && v != r11VReg {
+ panic("BUG")
+ }
op.setReg(v)
case operandKindMem:
op.addressMode().assignUses(index, v)
@@ -838,6 +848,12 @@ const (
// nopUseReg is a meta instruction that uses one register and does nothing.
nopUseReg
+ // tailCall is a meta instruction that emits a tail call.
+ tailCall
+
+ // tailCallIndirect is a meta instruction that emits a tail call with an indirect call.
+ tailCallIndirect
+
instrMax
)
@@ -1079,6 +1095,10 @@ func (k instructionKind) String() string {
return "lockcmpxchg"
case lockxadd:
return "lockxadd"
+ case tailCall:
+ return "tailCall"
+ case tailCallIndirect:
+ return "tailCallIndirect"
default:
panic("BUG")
}
@@ -1173,6 +1193,27 @@ func (i *instruction) asCallIndirect(ptr operand, abi *backend.FunctionABI) *ins
return i
}
+func (i *instruction) asTailCallReturnCall(ref ssa.FuncRef, abi *backend.FunctionABI) *instruction {
+ i.kind = tailCall
+ i.u1 = uint64(ref)
+ if abi != nil {
+ i.u2 = abi.ABIInfoAsUint64()
+ }
+ return i
+}
+
+func (i *instruction) asTailCallReturnCallIndirect(ptr operand, abi *backend.FunctionABI) *instruction {
+ if ptr.kind != operandKindReg && ptr.kind != operandKindMem {
+ panic("BUG")
+ }
+ i.kind = tailCallIndirect
+ i.op1 = ptr
+ if abi != nil {
+ i.u2 = abi.ABIInfoAsUint64()
+ }
+ return i
+}
+
func (i *instruction) asRet() *instruction {
i.kind = ret
return i
@@ -2342,6 +2383,8 @@ var defKinds = [instrMax]defKind{
lockxadd: defKindNone,
neg: defKindNone,
nopUseReg: defKindNone,
+ tailCall: defKindCall,
+ tailCallIndirect: defKindCall,
}
// String implements fmt.Stringer.
@@ -2375,6 +2418,7 @@ const (
useKindBlendvpd
useKindCall
useKindCallInd
+ useKindTailCallInd
useKindFcvtToSintSequence
useKindFcvtToUintSequence
)
@@ -2425,6 +2469,8 @@ var useKinds = [instrMax]useKind{
lockxadd: useKindOp1RegOp2,
neg: useKindOp1,
nopUseReg: useKindOp1,
+ tailCall: useKindCall,
+ tailCallIndirect: useKindTailCallInd,
}
func (u useKind) String() string {
@@ -2441,6 +2487,8 @@ func (u useKind) String() string {
return "call"
case useKindCallInd:
return "callInd"
+ case useKindTailCallInd:
+ return "tailCallInd"
default:
return "invalid"
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go
index 6637b428c..d1eefbdb5 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go
@@ -1211,7 +1211,7 @@ func (i *instruction) encode(c backend.Compiler) (needsLabelResolution bool) {
case call:
c.EmitByte(0xe8)
// Meaning that the call target is a function value, and requires relocation.
- c.AddRelocationInfo(ssa.FuncRef(i.u1))
+ c.AddRelocationInfo(ssa.FuncRef(i.u1), false)
// Note that this is zero as a placeholder for the call target if it's a function value.
c.Emit4Bytes(uint32(i.u2))
@@ -1244,6 +1244,37 @@ func (i *instruction) encode(c backend.Compiler) (needsLabelResolution bool) {
panic("BUG: invalid operand kind")
}
+ case tailCall:
+ // Encode as jmp.
+ c.EmitByte(0xe9)
+ // Meaning that the call target is a function value, and requires relocation.
+ c.AddRelocationInfo(ssa.FuncRef(i.u1), true)
+ // Note that this is zero as a placeholder for the call target if it's a function value.
+ c.Emit4Bytes(uint32(i.u2))
+
+ case tailCallIndirect:
+ op := i.op1
+
+ const opcodeNum = 1
+ const opcode = 0xff
+ const regMemSubOpcode = 4
+ rex := rexInfo(0).clearW()
+ switch op.kind {
+ // Indirect tail calls always take a register as the target.
+ // Note: the register should be a callee-saved register (usually r11).
+ case operandKindReg:
+ dst := regEncodings[op.reg().RealReg()]
+ encodeRegReg(c,
+ legacyPrefixesNone,
+ opcode, opcodeNum,
+ regMemSubOpcode,
+ dst,
+ rex,
+ )
+ default:
+ panic("BUG: invalid operand kind")
+ }
+
case xchg:
src, dst := regEncodings[i.op1.reg().RealReg()], i.op2
size := i.u1
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
index fd0d69ca9..57d9bb731 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
@@ -17,7 +17,7 @@ import (
// NewBackend returns a new backend for arm64.
func NewBackend() backend.Machine {
m := &machine{
- cpuFeatures: platform.CpuFeatures,
+ cpuFeatures: platform.CpuFeatures(),
regAlloc: regalloc.NewAllocator[*instruction, *labelPosition, *regAllocFn](regInfo),
spillSlots: map[regalloc.VRegID]int64{},
amodePool: wazevoapi.NewPool[amode](nil),
@@ -1109,6 +1109,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
atomicOp, size := instr.AtomicRmwData()
m.lowerAtomicRmw(atomicOp, addr, val, size, instr.Return())
+ case ssa.OpcodeTailCallReturnCall, ssa.OpcodeTailCallReturnCallIndirect:
+ m.lowerTailCall(instr)
+
default:
panic("TODO: lowering " + op.String())
}
@@ -1885,31 +1888,7 @@ func (m *machine) lowerStore(si *ssa.Instruction) {
func (m *machine) lowerCall(si *ssa.Instruction) {
isDirectCall := si.Opcode() == ssa.OpcodeCall
- var indirectCalleePtr ssa.Value
- var directCallee ssa.FuncRef
- var sigID ssa.SignatureID
- var args []ssa.Value
- var isMemmove bool
- if isDirectCall {
- directCallee, sigID, args = si.CallData()
- } else {
- indirectCalleePtr, sigID, args, isMemmove = si.CallIndirectData()
- }
- calleeABI := m.c.GetFunctionABI(m.c.SSABuilder().ResolveSignature(sigID))
-
- stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize())
- if m.maxRequiredStackSizeForCalls < stackSlotSize+16 {
- m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // 16 == return address + RBP.
- }
-
- // Note: See machine.SetupPrologue for the stack layout.
- // The stack pointer decrease/increase will be inserted later in the compilation.
-
- for i, arg := range args {
- reg := m.c.VRegOf(arg)
- def := m.c.ValueDefinition(arg)
- m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
- }
+ indirectCalleePtr, directCallee, isMemmove, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall)
if isMemmove {
// Go's memmove *might* use all xmm0-xmm15, so we need to release them.
@@ -1939,6 +1918,39 @@ func (m *machine) lowerCall(si *ssa.Instruction) {
m.insert(m.allocateInstr().asNopUseReg(regInfo.RealRegToVReg[rdx]))
}
+ m.insertReturns(si, calleeABI, stackSlotSize)
+}
+
+func (m *machine) prepareCall(si *ssa.Instruction, isDirectCall bool) (ssa.Value, ssa.FuncRef, bool, *backend.FunctionABI, int64) {
+ var indirectCalleePtr ssa.Value
+ var directCallee ssa.FuncRef
+ var sigID ssa.SignatureID
+ var args []ssa.Value
+ var isMemmove bool
+ if isDirectCall {
+ directCallee, sigID, args = si.CallData()
+ } else {
+ indirectCalleePtr, sigID, args, isMemmove = si.CallIndirectData()
+ }
+ calleeABI := m.c.GetFunctionABI(m.c.SSABuilder().ResolveSignature(sigID))
+
+ stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize())
+ if m.maxRequiredStackSizeForCalls < stackSlotSize+16 {
+ m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // 16 == return address + RBP.
+ }
+
+ // Note: See machine.SetupPrologue for the stack layout.
+ // The stack pointer decrease/increase will be inserted later in the compilation.
+
+ for i, arg := range args {
+ reg := m.c.VRegOf(arg)
+ def := m.c.ValueDefinition(arg)
+ m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
+ }
+ return indirectCalleePtr, directCallee, isMemmove, calleeABI, stackSlotSize
+}
+
+func (m *machine) insertReturns(si *ssa.Instruction, calleeABI *backend.FunctionABI, stackSlotSize int64) {
var index int
r1, rs := si.Returns()
if r1.Valid() {
@@ -1952,6 +1964,43 @@ func (m *machine) lowerCall(si *ssa.Instruction) {
}
}
+func (m *machine) lowerTailCall(si *ssa.Instruction) {
+ isDirectCall := si.Opcode() == ssa.OpcodeTailCallReturnCall
+ indirectCalleePtr, directCallee, isMemmove, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall)
+ if isMemmove {
+ panic("memmove not supported in tail calls")
+ }
+
+ isAllRegs := stackSlotSize == 0
+
+ switch {
+ case isDirectCall && isAllRegs:
+ call := m.allocateInstr().asTailCallReturnCall(directCallee, calleeABI)
+ m.insert(call)
+ case !isDirectCall && isAllRegs:
+ // In a tail call we insert the epilogue before the jump instruction,
+ // so an arbitrary register might be overwritten while restoring the stack.
+ // So, as compared to a regular indirect call, we ensure the pointer is stored
+ // in a caller-saved register (r11).
+ // For details, see internal/engine/RATIONALE.md
+ ptrOp := m.getOperand_Reg(m.c.ValueDefinition(indirectCalleePtr))
+ tmpJmp := r11VReg
+ m.InsertMove(tmpJmp, ptrOp.reg(), ssa.TypeI64)
+ callInd := m.allocateInstr().asTailCallReturnCallIndirect(newOperandReg(tmpJmp), calleeABI)
+ m.insert(callInd)
+ case isDirectCall && !isAllRegs:
+ call := m.allocateInstr().asCall(directCallee, calleeABI)
+ m.insert(call)
+ case !isDirectCall && !isAllRegs:
+ ptrOp := m.getOperand_Mem_Reg(m.c.ValueDefinition(indirectCalleePtr))
+ callInd := m.allocateInstr().asCallIndirect(ptrOp, calleeABI)
+ m.insert(callInd)
+ }
+
+ // If this is a proper tail call, returns will be cleared in the postRegAlloc phase.
+ m.insertReturns(si, calleeABI, stackSlotSize)
+}
+
// callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the
// caller side of the function call.
func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def backend.SSAValueDefinition, stackSlotSize int64) {
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go
index e53729860..fa3ca58a6 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go
@@ -188,6 +188,23 @@ func (m *machine) postRegAlloc() {
linkInstr(inc, next)
}
continue
+ case tailCall, tailCallIndirect:
+ // At this point, reg alloc is done, therefore we can safely insert dec RPS instruction
+ // right before the tail call (jump) instruction. If this is done before reg alloc, the stack slot
+ // can point to the wrong location and therefore results in a wrong value.
+ tailCall := cur
+ _, _, _, _, size := backend.ABIInfoFromUint64(tailCall.u2)
+ if size > 0 {
+ dec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(size), rspVReg, true)
+ linkInstr(tailCall.prev, dec)
+ linkInstr(dec, tailCall)
+ }
+ // In a tail call, we insert the epilogue before the jump instruction.
+ m.setupEpilogueAfter(tailCall.prev)
+ // If this has been encoded as a proper tail call, we can remove the trailing instructions
+ // For details, see internal/engine/RATIONALE.md
+ m.removeUntilRet(cur.next)
+ continue
}
// Removes the redundant copy instruction.
@@ -278,6 +295,20 @@ func (m *machine) setupEpilogueAfter(cur *instruction) {
linkInstr(cur, prevNext)
}
+// removeUntilRet removes the instructions starting from `cur` until the first `ret` instruction.
+func (m *machine) removeUntilRet(cur *instruction) {
+ for ; cur != nil; cur = cur.next {
+ prev, next := cur.prev, cur.next
+ prev.next = next
+ if next != nil {
+ next.prev = prev
+ }
+ if cur.kind == ret {
+ return
+ }
+ }
+}
+
func (m *machine) addRSP(offset int32, cur *instruction) *instruction {
if offset == 0 {
return cur
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
index d1eaa7cd4..c300c3d61 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
@@ -261,6 +261,23 @@ func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn reg
func (m *machine) lowerCall(si *ssa.Instruction) {
isDirectCall := si.Opcode() == ssa.OpcodeCall
+ indirectCalleePtr, directCallee, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall)
+
+ if isDirectCall {
+ call := m.allocateInstr()
+ call.asCall(directCallee, calleeABI)
+ m.insert(call)
+ } else {
+ ptr := m.compiler.VRegOf(indirectCalleePtr)
+ callInd := m.allocateInstr()
+ callInd.asCallIndirect(ptr, calleeABI)
+ m.insert(callInd)
+ }
+
+ m.insertReturns(si, calleeABI, stackSlotSize)
+}
+
+func (m *machine) prepareCall(si *ssa.Instruction, isDirectCall bool) (ssa.Value, ssa.FuncRef, *backend.FunctionABI, int64) {
var indirectCalleePtr ssa.Value
var directCallee ssa.FuncRef
var sigID ssa.SignatureID
@@ -282,18 +299,10 @@ func (m *machine) lowerCall(si *ssa.Instruction) {
def := m.compiler.ValueDefinition(arg)
m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
}
+ return indirectCalleePtr, directCallee, calleeABI, stackSlotSize
+}
- if isDirectCall {
- call := m.allocateInstr()
- call.asCall(directCallee, calleeABI)
- m.insert(call)
- } else {
- ptr := m.compiler.VRegOf(indirectCalleePtr)
- callInd := m.allocateInstr()
- callInd.asCallIndirect(ptr, calleeABI)
- m.insert(callInd)
- }
-
+func (m *machine) insertReturns(si *ssa.Instruction, calleeABI *backend.FunctionABI, stackSlotSize int64) {
var index int
r1, rs := si.Returns()
if r1.Valid() {
@@ -307,6 +316,40 @@ func (m *machine) lowerCall(si *ssa.Instruction) {
}
}
+func (m *machine) lowerTailCall(si *ssa.Instruction) {
+ isDirectCall := si.Opcode() == ssa.OpcodeTailCallReturnCall
+ indirectCalleePtr, directCallee, calleeABI, stackSlotSize := m.prepareCall(si, isDirectCall)
+
+ // We currently support tail calls only when the args are passed via registers
+ // otherwise we fall back to a plain call.
+ // For details, see internal/engine/RATIONALE.md
+ isAllRegs := stackSlotSize == 0
+
+ switch {
+ case isDirectCall && isAllRegs:
+ tailJump := m.allocateInstr()
+ tailJump.asTailCall(directCallee, calleeABI)
+ m.insert(tailJump)
+ case !isDirectCall && isAllRegs:
+ ptr := m.compiler.VRegOf(indirectCalleePtr)
+ callInd := m.allocateInstr()
+ callInd.asTailCallIndirect(ptr, calleeABI)
+ m.insert(callInd)
+ case isDirectCall && !isAllRegs:
+ tailJump := m.allocateInstr()
+ tailJump.asCall(directCallee, calleeABI)
+ m.insert(tailJump)
+ case !isDirectCall && !isAllRegs:
+ ptr := m.compiler.VRegOf(indirectCalleePtr)
+ callInd := m.allocateInstr()
+ callInd.asCallIndirect(ptr, calleeABI)
+ m.insert(callInd)
+ }
+
+ // If this is a proper tail call, returns will be cleared in the postRegAlloc phase.
+ m.insertReturns(si, calleeABI, stackSlotSize)
+}
+
func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) {
if imm12Operand, ok := asImm12Operand(uint64(diff)); ok {
alu := m.allocateInstr()
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
index 1f563428a..560044673 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
@@ -140,6 +140,8 @@ var defKinds = [numInstructionKinds]defKind{
atomicStore: defKindNone,
dmb: defKindNone,
loadConstBlockArg: defKindRD,
+ tailCall: defKindCall,
+ tailCallInd: defKindCall,
}
// Defs returns the list of regalloc.VReg that are defined by the instruction.
@@ -278,6 +280,8 @@ var useKinds = [numInstructionKinds]useKind{
atomicStore: useKindRNRM,
loadConstBlockArg: useKindNone,
dmb: useKindNone,
+ tailCall: useKindCall,
+ tailCallInd: useKindCallInd,
}
// Uses returns the list of regalloc.VReg that are used by the instruction.
@@ -1501,6 +1505,10 @@ func (i *instruction) String() (str string) {
str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64))
case dmb:
str = "dmb"
+ case tailCall:
+ str = fmt.Sprintf("b %s", ssa.FuncRef(i.u1))
+ case tailCallInd:
+ str = fmt.Sprintf("b %s", formatVRegSized(i.rn.nr(), 64))
case udf:
str = "udf"
case emitSourceOffsetInfo:
@@ -1550,6 +1558,22 @@ func (i *instruction) asDMB() {
i.kind = dmb
}
+func (i *instruction) asTailCall(ref ssa.FuncRef, abi *backend.FunctionABI) {
+ i.kind = tailCall
+ i.u1 = uint64(ref)
+ if abi != nil {
+ i.u2 = abi.ABIInfoAsUint64()
+ }
+}
+
+func (i *instruction) asTailCallIndirect(ptr regalloc.VReg, abi *backend.FunctionABI) {
+ i.kind = tailCallInd
+ i.rn = operandNR(ptr)
+ if abi != nil {
+ i.u2 = abi.ABIInfoAsUint64()
+ }
+}
+
// TODO: delete unnecessary things.
const (
// nop0 represents a no-op of zero size.
@@ -1727,6 +1751,10 @@ const (
atomicStore
// dmb represents the data memory barrier instruction in inner-shareable (ish) mode.
dmb
+ // tailCall represents a tail call instruction.
+ tailCall
+ // tailCallInd represents a tail call indirect instruction.
+ tailCallInd
// UDF is the undefined instruction. For debugging only.
udf
// loadConstBlockArg represents a load of a constant block argument.
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
index 21be9b71e..5326a5e28 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
@@ -39,7 +39,7 @@ func (i *instruction) encode(m *machine) {
c.Emit4Bytes(encodeUnconditionalBranch(false, imm))
case call:
// We still don't know the exact address of the function to call, so we emit a placeholder.
- c.AddRelocationInfo(i.callFuncRef())
+ c.AddRelocationInfo(i.callFuncRef(), false)
c.Emit4Bytes(encodeUnconditionalBranch(true, 0)) // 0 = placeholder
case callInd:
c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true))
@@ -417,6 +417,12 @@ func (i *instruction) encode(m *machine) {
))
case dmb:
c.Emit4Bytes(encodeDMB())
+ case tailCall:
+ // We still don't know the exact address of the function to call, so we emit a placeholder.
+ c.AddRelocationInfo(i.callFuncRef(), true) // true = IsTailCall
+ c.Emit4Bytes(encodeUnconditionalBranch(false, 0)) // 0 = placeholder
+ case tailCallInd:
+ c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], false))
default:
panic(i.String())
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
index f9df356c0..190bc6014 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
@@ -788,6 +788,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
instr.asDMB()
m.insert(instr)
+ case ssa.OpcodeTailCallReturnCall, ssa.OpcodeTailCallReturnCallIndirect:
+ m.lowerTailCall(instr)
+
default:
panic("TODO: lowering " + op.String())
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
index c646a8fab..16d0746e5 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
@@ -198,6 +198,11 @@ func (m *machine) postRegAlloc() {
switch cur.kind {
case ret:
m.setupEpilogueAfter(cur.prev)
+ case tailCall, tailCallInd:
+ m.setupEpilogueAfter(cur.prev)
+ // If this has been encoded as a proper tail call, we can remove the trailing instructions.
+ // For details, see internal/engine/RATIONALE.md
+ m.removeUntilRet(cur.next)
case loadConstBlockArg:
lc := cur
next := lc.next
@@ -325,6 +330,20 @@ func (m *machine) setupEpilogueAfter(cur *instruction) {
linkInstr(cur, prevNext)
}
+// removeUntilRet removes the instructions starting from `cur` until the first `ret` instruction.
+func (m *machine) removeUntilRet(cur *instruction) {
+ for ; cur != nil; cur = cur.next {
+ prev, next := cur.prev, cur.next
+ prev.next = next
+ if next != nil {
+ next.prev = prev
+ }
+ if cur.kind == ret {
+ return
+ }
+ }
+}
+
// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient
// stack space left. Basically this is the combination of CalleeSavedRegisters plus argument registers execpt for x0,
// which always points to the execution context whenever the native code is entered from Go.
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go
index 932fe842b..9bb4dee15 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go
@@ -59,13 +59,19 @@ func (m *machine) ResolveRelocations(
if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
// Find the near trampoline island from callTrampolineIslandOffsets.
islandOffset := searchTrampolineIsland(callTrampolineIslandOffsets, int(instrOffset))
- islandTargetOffset := islandOffset + trampolineCallSize*int(r.FuncRef)
+ // Imported functions don't need trampolines, so we ignore them when we compute the offset
+ // (see also encodeCallTrampolineIsland)
+ funcOffset := int(r.FuncRef) - importedFns
+ islandTargetOffset := islandOffset + trampolineCallSize*funcOffset
diff = int64(islandTargetOffset) - (instrOffset)
if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
panic("BUG in trampoline placement")
}
}
- binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(true, diff))
+ // The unconditional branch instruction is usually encoded as a branch-and-link (BL),
+ // because it is a function call. However, if the instruction is a tail call,
+ // we encode it as a plain unconditional branch (B), so we won't overwrite the link register.
+ binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(!r.IsTailCall, diff))
}
}